R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Set working directory
# setwd("D:\\BSE Semester 2\\Geospatial Data Science and Economic Spatial Models\\Assignments\\Assign 1\\Question 3")


# Load necessary packages
library(sf)
## Warning: package 'sf' was built under R version 4.4.2
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(geobr)
## Warning: package 'geobr' was built under R version 4.4.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
library(tidyr)
library(lwgeom)
## Warning: package 'lwgeom' was built under R version 4.4.2
## Linking to liblwgeom 3.0.0beta1 r16016, GEOS 3.12.2, PROJ 9.4.1
## 
## Attaching package: 'lwgeom'
## The following object is masked from 'package:sf':
## 
##     st_perimeter
library(sfheaders)
## Warning: package 'sfheaders' was built under R version 4.4.2
# Read the .xls file with the Brazilian population for 2009
df <- read_excel("D:/BSE Semester 2/Geospatial Data Science and Economic Spatial Models/Assignments/Assign 1/Question 3/UF_Municipio.xls", skip = 4, col_names = TRUE)
## New names:
## • `COD` -> `COD...2`
## • `COD` -> `COD...3`

Including Plots

You can also embed plots, for example:

# Get the geographical data from the package geobr
state <- read_state(year = 2010)
## Using year/date 2010
meso <- read_meso_region(year = 2010)
## Using year/date 2010
region <- read_region(year = 2010)
## Using year/date 2010
# Convert ESTIMADA (=population) into a numerical variable and handle missing values
# by setting them to 0
df <- df %>%
  mutate(ESTIMADA = as.numeric(ESTIMADA)) %>%
  mutate(ESTIMADA = replace_na(ESTIMADA, 0))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `ESTIMADA = as.numeric(ESTIMADA)`.
## Caused by warning:
## ! NAs introduced by coercion
# Group by the country code
df_grouped <- df %>%
  group_by(COD...2) %>%
  summarise(
    Total_Estimada = sum(ESTIMADA, na.rm = TRUE),
    Count = n()
  )

# Remove the last row of df_merged - in df the last row was the total
# and it would have been considered another meso region
df_grouped <- df_grouped %>%
  slice(-n())

# Merge the geographical data with the population data
df_merged <- state %>%
  left_join(df_grouped, by = c("code_state" = "COD...2"))

# Create the column pop_share
df_merged <- df_merged %>%
  mutate(pop_share = Total_Estimada / sum(Total_Estimada) * 100)

# Aggregate regions as the author's did to get the correct boundaries
region <- region %>%
  mutate(
    name_region = case_when(
      name_region %in% c("Centro Oeste", "Norte") ~ 2,
      TRUE ~ 1
    )
  ) %>%
  dplyr::select(name_region, geom) %>%
  group_by(name_region) %>%
  summarise(geom = st_union(geom)) %>%
  sfheaders::sf_remove_holes()

# Final plot
ggplot(data = df_merged) +
  geom_sf(aes(fill = pop_share), color = "white") +
  scale_fill_gradientn(
    colors = c("lightblue", "blue", "darkblue"),
    name = "Pop. Share",
    limits = c(0, 25)
  ) +
  geom_sf(data = region, aes(color = factor(name_region)), fill = NA, linewidth = 1) +
  scale_color_manual(values = c("black", "red")) +
  guides(color = "none")