Plotting South Africa figure (exercise 1)

Source: Metteta, E.R., Irrigation dams, water and infant mortality: Evidence from South Africa, Journal of Development Economics (2018).
Source: Metteta, E.R., Irrigation dams, water and infant mortality: Evidence from South Africa, Journal of Development Economics (2018).

Data sources, loading libraries and loading the data:

We downloaded the data from the following websites:

rm(list = ls())

library(sf)
library(spData) 
library(tidyverse) 
library(ggplot2)

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment")

unzip("SouthAfrica/RegisteredDamsOct2024.kmz",)

spatial_data <- st_read("doc.kml")
## Reading layer `GEarth dams' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\doc.kml' 
##   using driver `KML'
## Simple feature collection with 5744 features and 2 fields
## Geometry type: POINT
## Dimension:     XYZ
## Bounding box:  xmin: 17.5755 ymin: -34.67222 xmax: 33.65056 ymax: -18.81806
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84
sf.world <- world
sf.SouthAfrica <- sf.world%>% filter(sf.world$iso_a2=="ZA")

sf.ZA <- st_read(dsn = "SouthAfrica/za_shp/za.shp")
## Reading layer `za' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\SouthAfrica\za_shp\za.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 9 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 16.46998 ymin: -46.96575 xmax: 37.97779 ymax: -22.12645
## Geodetic CRS:  WGS 84
sf.ZA.distric <- st_read(dsn = "SouthAfrica/South_African_District/South_African_Education_District_Boundaries.shp")
## Reading layer `South_African_Education_District_Boundaries' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\SouthAfrica\South_African_District\South_African_Education_District_Boundaries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 86 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1831416 ymin: -4141368 xmax: 3661533 ymax: -2526543
## Projected CRS: WGS 84 / Pseudo-Mercator
sf.ZA.distric <- sf.ZA.distric %>%
  select(EIDISTRICT,PROVINCE, Area_sq_km,geometry)

rivers <- st_read(dsn = "SouthAfrica/allrivers/wriall500.shp")
## Reading layer `wriall500' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\SouthAfrica\allrivers\wriall500.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 10352 features and 26 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 15.98333 ymin: -34.76999 xmax: 33.99925 ymax: -19.79143
## Geodetic CRS:  Cape
rivers <- st_transform(rivers, st_crs(sf.ZA.distric))

Plotting the raw data (country (polygons), dam locations(points) and rivers (lines))

In this section we plot all the downloaded data without carrying any operation or handling of the data. This is useful to understand how the data looks and what information is available. During this step, we notice that the river and dam data accounts for information outside the country of South Africa.

ggplot() +
  geom_sf(data = sf.ZA.distric) +
  geom_sf(data = spatial_data)+
  geom_sf(data=rivers)

Data processing and cleaning

In this section, we start by identifying the rivers and dams that are outside the country and filter them out.

Further, as we do not account gradient* data from the rivers, we calculate the length of each river for further analysis.

#calculate the intersection of rivers and districts

rivers_in_regions <- st_intersection(rivers, sf.ZA.distric)

# Calculate the length of each river segment within a region
rivers_in_regions <- rivers_in_regions %>%
  mutate(segment_length = st_length(geometry)/1000)

# Calculate the total length of each river
total_river_lengths <- rivers %>%
  group_by(NAME) %>%
  summarize(total_length = sum(LENGTH_KM))%>%mutate(total_river_length = st_length(geometry)/1000)
# Join total lengths back to the data
rivers_in_regions <- rivers_in_regions %>%
  left_join(total_river_lengths %>% st_drop_geometry(), by = "NAME")

# Calculate the percentage of the river within each region
rivers_in_regions <- rivers_in_regions %>%
  mutate(percentage_in_region = (as.numeric(segment_length) / as.numeric(total_river_length)) * 100)

# Summarize percentage by region (optional)
region_summary <- rivers_in_regions %>%
  group_by(EIDISTRICT) %>%
  summarize(total_km_rivers = sum(segment_length))

region_summary <- region_summary %>%st_drop_geometry()

region_summary <- region_summary %>%
  distinct() 

# Merge
sf.ZA.distric <- sf.ZA.distric %>%
  left_join(region_summary, join_by(EIDISTRICT))

sf.ZA.distric$porcentage_rivers <- as.numeric(sf.ZA.distric$total_km_rivers) / sf.ZA.distric$Area_sq_km


##This excludes rivers that outside South Africa and fills in color the amount of km of rivers in each region.
ggplot() +
  geom_sf(data = sf.ZA.distric,  aes(fill = as.numeric(total_km_rivers)), color = "black") +  # Add region layer
  geom_sf(data = rivers_in_regions, color = "lightblue", size = 0.5) +  # Add river layer
  labs(title = "Districts and Rivers")

Then we decided to calculate the percentage of the river that is within each river (standardized by each region size)

*River gradient variable is constructed using both perennial and seasonal river segments. However, larger dams, which are more likely to have effects that cross district boundaries, are typically only placed on perennial rivers; thus, the average river gradient variable for upstream large dams is constructed using only perennial rivers. (Metteta, 2018)

ggplot() +
  geom_sf(data = sf.ZA.distric, aes(fill = porcentage_rivers))+
  geom_sf(data = spatial_data)+
  geom_point(size = 0.01) +
  scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Percentage Rivers") +
  labs(title = "Percentage of Rivers by District and Dam Locations") +
  theme_minimal()

Plotting Ethiopia figure (exercise 2)

#Setting up the environment
rm(list = ls()) #cleaning up the code environment
graphics.off() # cleans all the plots

#Loading required libraries, downloading required data from online sources, and loading the data:
library(sf) # simple features' library
library(spData) # library of spatial datasets
library(tidyverse) # dplyr, ggplot, ...
library(ggplot2)
library(readxl)

#Ethipia admin boundaries (Level 3- woredas): https://data.humdata.org/dataset/cod-ab-eth
#population density by woredas: https://data.humdata.org/dataset/cod-ps-eth
#major roads: https://data.humdata.org/dataset/hotosm_eth_roads
#high voltage electricity grid: https://datacatalog.worldbank.org/search/dataset/0039311
#ERSS enumeration areas (rural; surveyed twice): https://microdata.worldbank.org/index.php/catalog/2053 

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Paper 2_Ethiopia")

# Load the world map
gmsf.world <- world

# Filter for Ethiopia's geometry
sf.Ethiopia <- gmsf.world%>% filter(iso_a2=="ET")

#reading woreda shapefiles
sf.Ethiopia_subadmin <- st_read("eth_admbnda_adm3_csa_bofedb_2021.shp")
## Reading layer `eth_admbnda_adm3_csa_bofedb_2021' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Paper 2_Ethiopia\eth_admbnda_adm3_csa_bofedb_2021.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1082 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 32.9918 ymin: 3.40667 xmax: 47.98824 ymax: 14.84548
## Geodetic CRS:  WGS 84
#map with admin boundaries
ggplot(data = sf.Ethiopia) +
  geom_sf(data = sf.Ethiopia_subadmin, color = "grey") + # Ethiopian woredas
  coord_sf(crs = st_crs(4326), expand = FALSE, datum = NA) +  # Force flat projection
  theme_minimal() +
  labs(title = "Ethiopia: Woredas (Level 3 Subnational Boundaries)") +
  theme(
    panel.grid = element_blank()  # Remove gridlines entirely
  )

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Paper 2_Ethiopia")

#opening the population file
pop_data <- read_excel("eth_admpop_2023.xlsx", sheet = 5)

#renaming ID column for merge & executing the left-join merge
pop_data <- pop_data %>%
  rename(ADM3_PCODE = admin3Pcode)
sf.Ethiopia_subadmin <- sf.Ethiopia_subadmin %>%
  left_join(pop_data %>% select(ADM3_PCODE, Total), by = "ADM3_PCODE")

#population density: It is calculated by dividing the population in each woreda by the area of that woreda. To do that, we begin by changing the map projection to CRS 32637 which allows us to use the 'shape area' already available within the "sf.Ethiopia_subadmin" file to obtain its corresponding measure in km2. Thereafter, I perform a log transformation on the calculated density to reduce the impact of outliers, and plot the data effectively.
sf.Ethiopia_subadmin <- st_transform(sf.Ethiopia_subadmin, crs = 32637)
sf.Ethiopia_subadmin <- sf.Ethiopia_subadmin %>%
  mutate(area_km2 = as.numeric(st_area(geometry)) / 1e6)  # Convert from m² to km²
sf.Ethiopia_subadmin <- sf.Ethiopia_subadmin %>%
  mutate(population_density = Total / area_km2)
sf.Ethiopia_subadmin <- sf.Ethiopia_subadmin %>%
  mutate(log_population_density = log1p(population_density))  # Add 1 to avoid log(0)

#map with population density and admin boundaries
ggplot(data = sf.Ethiopia_subadmin) +
  geom_sf(aes(fill = log_population_density), color = "grey") +
  scale_fill_gradient(name = "Log(Population Density)", low = "#d0e7f2", high = "#2a5674") +
  theme_minimal() +
  labs(title = "Ethiopia: Population Density by Woredas (Level 3 Subnational Boundaries)") +
  theme(
    panel.grid = element_blank())

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Paper 2_Ethiopia")


#reading major roads shapefiles
sf.Ethiopia_roads <- st_read("hotosm_eth_roads_lines_shp/hotosm_eth_roads_lines_shp.shp")
## Reading layer `hotosm_eth_roads_lines_shp' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Paper 2_Ethiopia\hotosm_eth_roads_lines_shp\hotosm_eth_roads_lines_shp.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 420912 features and 18 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 32.9902 ymin: 3.410445 xmax: 47.98158 ymax: 14.8554
## Geodetic CRS:  WGS 84
unique(sf.Ethiopia_roads$highway)
##  [1] "tertiary"       "residential"    "unclassified"   "motorway_link" 
##  [5] "primary"        "service"        "track"          "escape"        
##  [9] "path"           "secondary"      "footway"        "construction"  
## [13] "trunk_link"     "steps"          "primary_link"   "trunk"         
## [17] "secondary_link" "tertiary_link"  "road"           "living_street" 
## [21] "motorway"       "bridleway"      "pedestrian"     "platform"      
## [25] "corridor"       "services"       "proposed"       "emergency_bay"
# Define major highways: There are 28 types of roads in Ethiopia; however, not all are important for our purposes since the paper only shows the most important roads. Thus, we filter out all the roads except "motorway" and "trunk”. 
major_highways <- c("motorway", "trunk")

# Filter the roads dataset
sf.Ethiopia_roads <- sf.Ethiopia_roads %>%
  filter(highway %in% major_highways)
unique(sf.Ethiopia_roads$highway)
## [1] "trunk"    "motorway"
#map with population density, admin boundaries, and major roads
ggplot(data = sf.Ethiopia_subadmin) +
  geom_sf(aes(fill = log_population_density), color = "grey") +
  geom_sf(data = sf.Ethiopia_roads, color = "black", size = 0.3) +  # Add major roads
  scale_fill_gradient(name = "Log(Population Density)", low = "#d0e7f2", high = "#2a5674") +
  theme_minimal() +
  labs(title = "Ethiopia: Population Density by Woredas (Level 3 Subnational Boundaries) and Major Roads") +
  theme(
    panel.grid = element_blank())

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Paper 2_Ethiopia")


#reading electricity transmission shapefiles
sf.Ethiopia_elec <- st_read("ethiopia-electricity-transmission-network/Ethiopia Electricity Transmission Network.shp")
## Reading layer `Ethiopia Electricity Transmission Network' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Paper 2_Ethiopia\ethiopia-electricity-transmission-network\Ethiopia Electricity Transmission Network.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 8 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 34.5333 ymin: 3.613251 xmax: 43.58116 ymax: 14.27694
## Geodetic CRS:  WGS 84
unique(sf.Ethiopia_elec$VOLTAGE_KV)
## [1]  66 132 230 330  45 400
# Define major transmission lines: There are 6 types of transmission lines in Ethiopia of various kVs; however, not all are important for our purposes since the paper only shows the most important lines. Thus, we filter out all the transmission lines except 230 kV, 330 kV, and 400 kV.
major_lines <- c("230", "330", "400")

# Filter the roads dataset
sf.Ethiopia_elec <- sf.Ethiopia_elec %>%
  filter(VOLTAGE_KV %in% major_lines)
unique(sf.Ethiopia_elec$VOLTAGE_KV)
## [1] 230 330 400
#map with population density, admin boundaries, major roads, and major electricity transmission lines
ggplot(data = sf.Ethiopia_subadmin) +
  geom_sf(aes(fill = log_population_density), color = "grey") +
  geom_sf(data = sf.Ethiopia_roads, color = "black", size = 0.3) +  # Add major roads
  geom_sf(data = sf.Ethiopia_elec, color = "red", size = 0.5) +  # Add electricity infrastructure in red
  scale_fill_gradient(name = "Log(Population Density)", low = "#d0e7f2", high = "#2a5674") +
  theme_minimal() +
  labs(title = "Ethiopia: Population Density by Woredas (Level 3 Subnational Boundaries),\n Major Roads, and Major Transmission Lines") +
  theme(
    panel.grid = element_blank())

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Paper 2_Ethiopia")

#ERSS Survey coordinates: We obtain the coordinates of the ERSS enumeration centers from the appropriate csv files. Thereafter, we add them as a layer on our map, per the paper's requirements.
ERSS <- read.csv("pub_eth_householdgeovariables_y1.csv")
ERSS_sf <- st_as_sf(ERSS, coords = c("LON_DD_MOD", "LAT_DD_MOD"), crs = 4326)

#map with population density, admin boundaries, major roads, major electricity transmission lines, and ERSS sites
ggplot(data = sf.Ethiopia_subadmin) +
  geom_sf(aes(fill = log_population_density), color = "grey") +
  geom_sf(data = sf.Ethiopia_roads, aes(color = "Major Roads"), size = 0.3) +
  geom_sf(data = sf.Ethiopia_elec, aes(color = "Transmission Lines"), size = 0.5) +
  geom_sf(data = ERSS_sf, aes(color = "ERSS Locations"), size = 1, alpha = 0.6) +
  scale_fill_gradient(name = "Log(Population Density)", low = "#d0e7f2", high = "#2a5674") +
  scale_color_manual(
    name = "Legend",
    values = c(
      "Major Roads" = "black",           # Black for roads
      "Transmission Lines" = "red",     # Red for transmission lines
      "ERSS Locations" = "black"              # Blue for points
    )
  ) +
  theme_minimal() +
  labs(title = "Ethiopia: Population Density by Woredas (Level 3 Subnational Boundaries),\n Major Roads, Major Transmission Lines, and ERSS Survey Sites") +
  theme(
    panel.grid = element_blank())

Plotting Brazil figure (exercise 3)

rm(list = ls())

library(patchwork)
library(geobr)
library(ggplot2)
library(dplyr)
library(readxl)
library(tidyr)
library(lubridate)
library(sf)
library(readr)

# Download Brazil's regional and meso-regional shapefile from the geobr package.
brazil_regions <- read_region(year = 2010)
brazil_meso <- read_meso_region(year = 2010)

# Downloading Population Data. The data was found on http://www.ipeadata.gov.br/Default.aspx. 
# We originally looked for it on the IBGE website since that is what the author cited, but 
# the meso-region level data was not available on that site. IBGE only had State-Level population
# data going back to 1950. After downloading the data from IPEA, we cleaned the data on excel to make it more
# readable and easier to manipulate in R. While uploading to R, skip the first row.

population_meso <- read_excel("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/BraziP3/population_meso.xlsx",range = "A2:F139")

# Next, we reshaped the population data from wide to long format to make it easier to plot
meso_long <- population_meso %>%
  pivot_longer(
    cols = c("1950", "1980", "2010"),
    names_to = "year",           
    values_to = "population" 
  ) %>%
  mutate(year = as.numeric(year))

# Then, we mutated the mesoregion codes in meso_long to match brazil_meso, as.integer, because the merge
# wasn't working otherwise.
meso_long$code <- as.integer(meso_long$code)

# Merged population data with meso-region boundaries
brazil_meso_pop <- meso_long %>%
  left_join(brazil_meso, by = c("code" = "code_meso"))

# Created a copy of brazil_regions to test out new plotting strategy for the red 
# border that doesn't depend on merging regional geometries into the brazil_meso_pop dataframe.
br_regions <- brazil_regions

# Filter regions 1 and 5, group by west_dummy
br_regions <- br_regions %>%
  mutate(west_dummy = if_else(code_region %in% c(1, 5), 1, 0))

# Dissolve the geometry of regions where west_dummy = 1, and create new dataframe for west_region
west_region <- br_regions %>%
  filter(west_dummy == 1) %>%            
  summarise(geom = st_union(geom)) 

# The plot was not working, so we had to check if the dataframes were in the correct sf format.
# Using st_geometry to check if brazil_meso_pop2 and west_region contain a geometry column.
#st_geometry(brazil_meso_pop)
#st_geometry(west_region)

# brazil_meso_pop wasn't reading as an sf, so we converted the dataframe to an sf object
# and setting CRS to default.
brazil_meso_pop <- st_as_sf(brazil_meso_pop, crs = 4326)  

# Check if the geometry is now recognized
st_geometry(brazil_meso_pop)
## Geometry set for 411 features 
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.99045 ymin: -33.75208 xmax: -28.83594 ymax: 5.271841
## Geodetic CRS:  WGS 84
## First 5 geometries:
# Plotting the data

# Created a blue gradient that sort of matches the color scale in the reference paper. 
# We couldn't find a way to match the color scale perfectly.
blue_gradient <- scale_fill_gradientn(
  colors = c("white", "lightblue", "blue"),
  name = "Pop. Share (log10)"
)

# Created plots for each year with the legend below. Whenever we created all three maps in one plot,
# there was only one legend off to the right. In order to get a legend under each map, we had to plot
# them separately. Furthermore, we could not get the red line to go thicker even though the size is 20. 
# The plots are built using the brazil_meso_pop merged data, the west_region data with the first and
# fifth regions merged into one (for the red border), the panel grid deleted, and the legend positioned
# at the bottom. 

plot_1950 <- ggplot(data = brazil_meso_pop %>% filter(year == 1950)) +
  geom_sf(aes(fill = log10(population)), color = "black") +
  blue_gradient +
  geom_sf(data = west_region, color = "red", fill = NA, size = 20) +
  labs(title = "1950", fill = "Pop. Share (log10)") +  # Legend title
  theme_minimal() +
  theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    legend.position = "bottom"  # Move legend below the map
  )

plot_1980 <- ggplot(data = brazil_meso_pop %>% filter(year == 1980)) +
  geom_sf(aes(fill = log10(population)), color = "black") +
  blue_gradient +
  geom_sf(data = west_region, color = "red", fill = NA, size = 20) +
  labs(title = "1980", fill = "Pop. Share (log10)") +  # Legend title
  theme_minimal() +
  theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    legend.position = "bottom"
  )

plot_2010 <- ggplot(data = brazil_meso_pop %>% filter(year == 2010)) +
  geom_sf(aes(fill = log10(population)), color = "black") +
  blue_gradient +
  geom_sf(data = west_region, color = "red", fill = NA, size = 20) +
  labs(title = "2010", fill = "Pop. Share (log10)") +  # Legend title
  theme_minimal() +
  theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    legend.position = "bottom"
  )


# Using the patchwork library, combined the plots in a single line and add title/caption. 
# The title size was increased and emboldened to match the paper when zoomed in.The caption is 
# the same as the paper except we cite IPEA as well since this is where the population data was found.
final_plot <- (plot_1950 + plot_1980 + plot_2010) +
  plot_layout(ncol = 3) + 
  plot_annotation(
    title = "The Spatial Distribution of the Brazilian Population between 1950 and 2010",
    caption = "Notes: The figure shows the division of Brazil into meso-regions, which is the geographic unit used in our 
    analysis. The red contour shows the meso-regions we classify as the West. The West incorporates the North 
    and the Central-West, two of the five official regions defined by the Brazilian statistical bureu (IBGE and IPEA)."
  ) & 
  theme(
    plot.title = element_text(
      size = 20,
      hjust = 0.5, 
      face = "bold"   
    ),
    plot.caption = element_text(size = 12, hjust = 0.5, face = "bold")
  )

# Display the final plot. It is best viewed in R's Zoom feature.
final_plot

Plotting Vietnam figure (exercise 4)

#Country boarder: https://gadm.org/data.html
#Vietnam road data unfiltered:https://www.globio.info/download-grip-dataset
library(sf)
library(spData) 
library(tidyverse) 
library(ggplot2)

rm(list = ls())

getwd()
## [1] "C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/SouthAfrica"
setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Vietnamv2")


#Loading the data: country_boarder - GDAM, road_types_asia - GLOBIO
country_boarder <- st_read("gadm41_VNM_0.shp")
## Reading layer `gadm41_VNM_0' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Vietnamv2\gadm41_VNM_0.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 102.1446 ymin: 8.381355 xmax: 109.4692 ymax: 23.39269
## Geodetic CRS:  WGS 84
road_types_asia <- st_read("GRIP4_region6.shp")
## Reading layer `GRIP4_region6' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Vietnamv2\GRIP4_region6.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5244510 features and 10 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 60.49997 ymin: -10.93716 xmax: 155.8754 ymax: 53.94999
## Geodetic CRS:  WGS 84
#The road data used is from the year 2014 since data from other sources and from the year 2010 didn't have road types specified. 
#(Other datasets used that did not contain sufficient road type information: OpenDevelopment Mekong https://data.opendevelopmentmekong.net/dataset/giao-thng-vit-nam, NASA database: https://www.earthdata.nasa.gov/data/catalog/sedac-ciesin-sedac-groads-v1-1.00)

#Some initial information 
print(country_boarder) # Summary of the shapefile
## Simple feature collection with 1 feature and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 102.1446 ymin: 8.381355 xmax: 109.4692 ymax: 23.39269
## Geodetic CRS:  WGS 84
##   GID_0 COUNTRY                       geometry
## 1   VNM Vietnam MULTIPOLYGON (((106.0044 9....
plot(st_geometry(country_boarder)) # Initial plot of the boarder

head(road_types_asia) 
## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 80.52085 ymin: 5.94279 xmax: 80.60061 ymax: 5.94674
## Geodetic CRS:  WGS 84
##   GP_RTP GP_REX GP_RAV GP_RRG GP_RCY GP_RSE GP_RSI GP_RSY gp_gripreg
## 1      5      1      2     18    144      2     39      0          6
## 2      5      1      2     18    144      2     39      0          6
## 3      5      1      2     18    144      2     39      0          6
## 4      5      1      2     18    144      2     39      0          6
## 5      5      1      2     18    144      2     39      0          6
## 6      5      1      2     18    144      2     39      0          6
##     Shape_Leng                       geometry
## 1 0.0010140280 MULTILINESTRING ((80.59848 ...
## 2 0.0105892924 MULTILINESTRING ((80.60005 ...
## 3 0.0023762227 MULTILINESTRING ((80.52477 ...
## 4 0.0017029880 MULTILINESTRING ((80.52477 ...
## 5 0.0009459547 MULTILINESTRING ((80.59826 ...
## 6 0.0043954192 MULTILINESTRING ((80.52085 ...
#Checking the CRS of the data
st_crs(country_boarder)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]
st_crs(road_types_asia)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]
#Filtering roads for Vietnam only
roads_vietnam_GRIP <- road_types_asia[st_intersects(road_types_asia, country_boarder, sparse = FALSE), ]

#Saving the .shp file for Vietnam (saving computational power)
#st_write(roads_vietnam_GRIP, "roads_vietnam_GRIP.shp")

#Removing the file for the whole asia
rm(road_types_asia)


#Plotting the roads and the map in a simple ggplot
ggplot() +
  geom_sf(data = country_boarder, fill = "white", color = "black") +
  geom_sf(data = roads_vietnam_GRIP, color = "red", size = 0.5) +         
  theme_void() +                                          
  labs(title = "Country Map with Roads")

#Cropping some of the roads
roads_vietnam_clipped <- st_intersection(roads_vietnam_GRIP, country_boarder)

# Defining the color palette for the roads
road_colors <- c(
  "Highways" = "blue",
  "Primary roads" = "darkgreen",
  "Secondary roads" = "red",
  "Tertiary roads" = "gold",
  "Local roads" = "yellow"
)


#Plotting the map
ggplot() +
  geom_sf(
    data = roads_vietnam_clipped, #First plotting the roads
    aes(
      color = case_when(  #Specifying the colours
        GP_RTP == 1 ~ "Highways",
        GP_RTP == 2 ~ "Primary roads",
        GP_RTP == 3 ~ "Secondary roads",
        GP_RTP == 4 ~ "Tertiary roads",
        GP_RTP == 5 ~ "Local roads",
        TRUE ~ "Unknown"
      ),
      size = case_when( #adjusting the thickness of the roads to highlight the importance of the highways
        GP_RTP == 1 ~ 1.2,  # Highways
        GP_RTP == 2 ~ 1.0,  # Primary roads
        GP_RTP == 3 ~ 0.7,  # Secondary roads
        GP_RTP == 4 ~ 0.5,  # Tertiary roads 
        GP_RTP == 5 ~ 0.3,  # Local roads
        TRUE ~ 0.5
      )
    ),
    show.legend = c(size = FALSE)
  ) +
  geom_sf(data = country_boarder, fill = NA, color = "black", size = 0.7) + # plotting the country boarder on top
  scale_color_manual(values = road_colors) +
  coord_sf(expand = FALSE) + 
  theme_void() +
  theme(
    plot.margin = margin(0.5, 0.5, 1, 1, "cm"),
    plot.background = element_rect( #adding the background box
      fill = "NA",
      colour = "black")
  )+
  labs(
    title = "Road Map 2014",
    color = "Road Types"
  )

#Adding a map for 2007
##https://github.com/globalmaps/gmvn10/blob/master/roadl.dbf
#Source:"Global Map of Vietnam ©ISCGM/ Department of Survey and Mapping, Ministry of Nutural Resources and Environment -Vietnam"
setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Vietnamv2")

#Loading the Data
road_types_github <- st_read("roadl.shp")
## Reading layer `roadl' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Vietnamv2\roadl.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1457 features and 17 fields
## Geometry type: LINESTRING
## Dimension:     XYZ
## Bounding box:  xmin: 102.7777 ymin: 8.760571 xmax: 109.3931 ymax: 23.27846
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84
#Since we have smaller amount of types available, we pick the colours again
road_colors_2 <- c(
  "Local Roads" = "yellow",
  "Primary roads" = "darkgreen",
  "Secondary roads" = "red",
  "Unknown" = "grey"
)

#And we plot same as before, using the data from 2007
ggplot() +
  geom_sf(
    data = road_types_github,
    aes(
      color = case_when(
        rtt_descri == "Other" ~ "Local Roads",
        rtt_descri == "Primary Route" ~ "Primary roads",
        rtt_descri == "Secondary Route" ~ "Secondary roads",
        TRUE ~ "Unknown"
      )
    ),
    show.legend = c(size = FALSE)
  ) +
  geom_sf(data = country_boarder, fill = NA, color = "black", size = 0.7) +
  scale_color_manual(values = road_colors_2) +
  coord_sf(expand = FALSE) + 
  theme_void() +
  theme(
    plot.margin = margin(0.5, 0.5, 1, 1, "cm"),
    plot.background = element_rect(
      fill = "NA",
      colour = "black")
  ) +
  labs(
    title = "Road Map 2007",
    color = "Road Types"
)

library(patchwork)

#Plotting both graphs next to each other: 
plot_2007 <- ggplot() +
  geom_sf(
    data = road_types_github,
    aes(
      color = case_when(
        rtt_descri == "Other" ~ "Local Roads",
        rtt_descri == "Primary Route" ~ "Primary roads",
        rtt_descri == "Secondary Route" ~ "Secondary roads",
        TRUE ~ "Unknown"
      )
    ),
    show.legend = c(size = FALSE)
  ) +
  geom_sf(data = country_boarder, fill = NA, color = "black", size = 0.7) +
  scale_color_manual(values = road_colors) +
  coord_sf(expand = FALSE) + 
  theme_void() +
  theme(
    plot.margin = margin(0.5, 0.5, 1, 1, "cm"),
    plot.background = element_rect(
      fill = "NA",
      colour = "black")
  ) +
  labs(
    title = "Road Map 2007",
    color = "Road Types"
  )

plot_2014 <- ggplot() +
  geom_sf(
    data = roads_vietnam_clipped, # First plotting the roads
    aes(
      color = case_when(  # Specifying the colours
        GP_RTP == 1 ~ "Highways",
        GP_RTP == 2 ~ "Primary roads",
        GP_RTP == 3 ~ "Secondary roads",
        GP_RTP == 4 ~ "Tertiary roads",
        GP_RTP == 5 ~ "Local roads",
        TRUE ~ "Unknown"
      ),
      size = case_when( # Adjusting the thickness of the roads
        GP_RTP == 1 ~ 1.2,  # Highways
        GP_RTP == 2 ~ 1.0,  # Primary roads
        GP_RTP == 3 ~ 0.7,  # Secondary roads
        GP_RTP == 4 ~ 0.5,  # Tertiary roads 
        GP_RTP == 5 ~ 0.3,  # Local roads
        TRUE ~ 0.5
      )
    ),
    show.legend = c(size = FALSE)
  ) +
  geom_sf(data = country_boarder, fill = NA, color = "black", size = 0.7) + # Plotting the country border
  scale_color_manual(values = road_colors) +
  coord_sf(expand = FALSE) + 
  theme_void() +
  theme(
    plot.margin = margin(0.5, 0.5, 1, 1, "cm"),
    plot.background = element_rect( # Adding the background box
      fill = "NA",
      colour = "black")
  ) +
  labs(
    title = "Road Map 2014",
    color = "Road Types"
  )

# Combine the plots side by side using patchwork
combined_plot <- plot_2007 | plot_2014

# Print the combined plot
combined_plot

Plotting Brazil figure (exercise 5)

Morten, Melanie, and Jaqueline Oliveira. 2024. “The Effects of Roads on Trade and Migration: Evidence from a Planned Capital City.” American Economic Journal: Applied Economics, 16 (2): 389–421.
Morten, Melanie, and Jaqueline Oliveira. 2024. “The Effects of Roads on Trade and Migration: Evidence from a Planned Capital City.” American Economic Journal: Applied Economics, 16 (2): 389–421.

Data sources, loading libraries and loading the data:

#download Brazil shape data from: https://purl.stanford.edu/kx233jf3889
#download Brazil point data from:https://searchworks.stanford.edu/view/tc137vp4426
#download Brazil road data from: https://datacatalog.worldbank.org/search/dataset/0038536
#doanload Brazil highways data (2005) from: https://www.ibge.gov.br/en/geosciences/maps/brazil-geographic-networks-mapasdobrasil/18884-transportation-logistics.html?edicao=25946&t=downloads

rm(list = ls())


library(sf)
library(spData)
library(tidyverse)
library(igraph)


setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment")

sf.BZ <- st_read(dsn = "Brazil/Brazilshape/04_limiteestadual1970.shp")
## Reading layer `04_limiteestadual1970' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Brazil\Brazilshape\04_limiteestadual1970.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2205422 ymin: -3735996 xmax: 2399861 ymax: 586382
## Projected CRS: World_Polyconic
sf.BZcapitals <- st_read(dsn = "Brazil/CapitalsandCities/02_capitalestadual1991.shp")
## Reading layer `02_capitalestadual1991' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Brazil\CapitalsandCities\02_capitalestadual1991.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 26 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -1513813 ymin: -3326824 xmax: 2113446 ymax: 313927.8
## Projected CRS: World_Polyconic

Plotting the raw data (country and states (polygons), and roads (lines)

ggplot()+
  geom_sf(data=sf.BZ)+
  geom_sf(data=sf.BZcapitals)

sf.BZhighways <- st_read(dsn = "C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Brazil/highways/RODOVIAS_ESTADUAIS_SRE2.shp")
## Reading layer `RODOVIAS_ESTADUAIS_SRE2' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Brazil\highways\RODOVIAS_ESTADUAIS_SRE2.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 15589 features and 16 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -73.80197 ymin: -33.71964 xmax: -34.79332 ymax: 4.925941
## Geodetic CRS:  SIRGAS 2000
sf.BZhighways2005 <- st_read(dsn = "C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment/Brazil/2005/rodovia_2005.shp")
## Reading layer `rodovia_2005' from data source 
##   `C:\Users\Juan Acuña\Desktop\BSE\T2\Geospatial Data Science\Assignment\Brazil\2005\rodovia_2005.shp' 
##   using driver `ESRI Shapefile'
## replacing null geometries with empty geometries
## Simple feature collection with 6305 features and 3 fields (with 71 geometries empty)
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -2543016 ymin: -3873737 xmax: 2105837 ymax: 622837.4
## Projected CRS: World_Polyconic
ggplot()+
  geom_sf(data=sf.BZ)+
  geom_sf(data=sf.BZhighways2005)

sf.BZhighways2005 <- st_transform(sf.BZhighways2005, st_crs(sf.BZ))

We notice that there are international roads that should be partially deleted.

#delete internatinal roads from the data

sf.BZhighways2005 <- sf.BZhighways2005%>%filter(sf.BZhighways2005$SITUACAO!="Internacional")
ggplot()+
  geom_sf(data=sf.BZ)+
  geom_sf(data=sf.BZhighways2005)

##Create a dummy to determine which roads go through Brazilia (in and out)

Brasiliashape <- sf.BZ[sf.BZ$nome=="Brasília",]

Brasiliashape <- st_transform(Brasiliashape, st_crs(sf.BZhighways))

intersects <- st_intersects(sf.BZhighways, Brasiliashape, sparse = FALSE)

sf.BZhighways$dummy <- ifelse(intersects[, 1], 1, 0)

##Filter if the dummy=0

#sf.BZhighwaysBrasilia <- sf.BZhighways%>%filter(sf.BZhighways$dummy==0)


brazil_states <- sf.BZ

# 2. Capitals dataset (sf.BZCapitals is already available)
capitals_sf <- sf.BZcapitals
brasilia_coords <- st_coordinates(capitals_sf[capitals_sf$NOME == "Brasília", ]) # Coordinates for Brasília

We try to simulate the same Minimum Spanning Tree process between all state capitals and Brasilia

# 3. Generate radial highways (lines connecting Brasília to all other capitals)
radial_lines <- capitals_sf %>%
  filter(NOME != "Brasília") %>%  # Exclude Brasília
  rowwise() %>%
  mutate(
    geometry = st_sfc(
      st_linestring(rbind(brasilia_coords, st_coordinates(geometry)))  # Create a line to Brasília
    )
  ) %>%
  st_as_sf()

# 4. Compute the Minimum Spanning Tree (MST)
# Extract coordinates of all capitals
capital_coords <- st_coordinates(capitals_sf)
distances <- as.matrix(dist(capital_coords))  # Pairwise distances between capitals

# Create graph from distance matrix and calculate MST
graph <- graph_from_adjacency_matrix(distances, mode = "undirected", weighted = TRUE)
mst <- mst(graph)


# Extract MST edges and convert to sf
mst_edges <- as.data.frame(get.edges(mst, seq_len(ecount(mst))))
mst_lines <- do.call(rbind, apply(mst_edges, 1, function(edge) {
  st_sfc(st_linestring(rbind(capital_coords[edge[1], ], capital_coords[edge[2], ])))
}))
mst_sf <- st_as_sf(data.frame(id = 1:nrow(mst_lines), geometry = mst_lines))
mst_sf <- st_set_crs(mst_sf, st_crs(sf.BZ)) 

ggplot()+
  geom_sf(data=sf.BZ)+
  geom_sf_text(data = sf.BZ, aes(label = nome), size = 3, color = "black") +
  geom_sf(data=sf.BZhighways2005, aes(linetype = SITUACAO, color=SITUACAO))+
  geom_sf(data=sf.BZcapitals,color="blue")+
  geom_sf(data = mst_sf, color = "black", linetype = "dashed", size = 0.6)