Geospatial Data Science. Assignment 1

Setup

For the markdown set up, We’ll put include and echo on TRUE so the code and the output can both be seen. In this assignment we’ll replicate graphs from some research papers

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(include = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
library(sf)

## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE

library(ggplot2)
library(spData)

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

library(geosphere)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
setwd("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/") #Set up the working directory

Irrigation, Damns, Water and Infantas Mortality: Evidence from South Africa (Fig.2 Hydrodamns in South Africa)

For this graph, we gathered all the available data on dams. We wanted to include information about river gradients to make our analysis more detailed, but unfortunately, we couldn’t find any data on river gradients for this country. Without this information, we decided to show the number of dams in each province instead. This way, the map still gives us some insight into the water system in South Africa, even though it’s not exactly what we originally planned.

fig1_data <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/zaf_adm_sadb_ocha_20201109_SHP", 
                     'zaf_admbnda_adm2_sadb_ocha_20201109')

## Reading layer `zaf_admbnda_adm2_sadb_ocha_20201109' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\zaf_adm_sadb_ocha_20201109_SHP' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 52 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 16.45189 ymin: -34.83417 xmax: 32.94498 ymax: -22.12503
## Geodetic CRS:  WGS 84

fig1_damns <- read.csv("globaldamsdatabase_global_coverage_november_2020.csv") %>%
  filter(Country == "South Africa", !(Long__res_ == 0 & Lat__res_ == 0)) 

points_damns <- st_as_sf(fig1_damns, coords = c("Long__res_", "Lat__res_"), crs = 4326) #Covert these column in latitude and longitude son we can work with them with the sf package

dams_sf <- st_as_sf(fig1_damns, coords = c("Long__res_", "Lat__res_"), crs = st_crs(fig1_data))
#Left join so we can use this joined data to graph on later

joined_data <- st_join(dams_sf, fig1_data, join = st_intersects) #join to count the damns per province

dam_count_per_province <- joined_data %>%
  group_by(ADM2_EN) %>%
  summarise(dam_count = n()) 

total_dams <- sum(dam_count_per_province$dam_count)

dam_count_per_province <- dam_count_per_province %>%
  mutate(share_of_dams = (dam_count / total_dams) * 100) 

df <- as.data.frame(dam_count_per_province) 

df <- df %>% 
  select(-geometry)

merged_data <- left_join(fig1_data, df, by = "ADM2_EN")


 fig1_plot <- ggplot() +
  geom_sf(data = merged_data, aes(fill = share_of_dams), color = "black") +
  geom_sf(data = points_damns, color = "black", size = 1, alpha = 0.7) +
  scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Share of Dams (%)") +
  theme_minimal() +
  labs(title = "Share of Dams per Province") +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
        legend.position = "right",
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1))

print(fig1_plot)

Rural electrification, migration and estructural transformation: Evidence from ethiopia (Fig.4 Districts and Electriciry grids in Ethiopia)

For this graph, we collected data on administrative boundaries, locations of generators, electricity grids, and roads in Ethiopia. We managed to find all the necessary data. However, we encountered some difficulty integrating all of them, especially the road data, into our graph. As shown in our code, we processed the road data, but we’re still facing some challenges. The reasearch is primarily focused on electricity grids and generators in our research, and our graph offers detailed information on these aspects.

fig2_data <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/ex2_maps/Ethiopia_AdminBoundaries-shp", 'Ethiopia_AdminBoundaries')

## Reading layer `Ethiopia_AdminBoundaries' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\ex2_maps\Ethiopia_AdminBoundaries-shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 684 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 33.00154 ymin: 3.398548 xmax: 47.95823 ymax: 14.84569
## Geodetic CRS:  WGS 84

fig2_data <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/eth_adm_csa_bofedb_2021_shp", 'eth_admbnda_adm3_csa_bofedb_2021')

## Reading layer `eth_admbnda_adm3_csa_bofedb_2021' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\eth_adm_csa_bofedb_2021_shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1082 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 32.9918 ymin: 3.40667 xmax: 47.98824 ymax: 14.84548
## Geodetic CRS:  WGS 84

fig2_roads <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/ethiopia_roads_network", 'ethiopia_roads_network')

## Reading layer `ethiopia_roads_network' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\ethiopia_roads_network' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 14255 features and 35 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -161811.4 ymin: 385508.7 xmax: 1494458 ymax: 1639839
## Projected CRS: Adindan / UTM zone 37N

fig2_elect <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/ex2_maps/ethiopia-electricity-transmission-network", 'Ethiopia Electricity Transmission Network')

## Reading layer `Ethiopia Electricity Transmission Network' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\ex2_maps\ethiopia-electricity-transmission-network' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 8 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 34.5333 ymin: 3.613251 xmax: 43.58116 ymax: 14.27694
## Geodetic CRS:  WGS 84

fig2_gen <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/ex2_maps/Ethiopia_-_generators-shp", 'Ethiopia_-_generators')

## Reading layer `Ethiopia_-_generators' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\ex2_maps\Ethiopia_-_generators-shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 15 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 34.26808 ymin: 4.177 xmax: 43.214 ymax: 14.16865
## Geodetic CRS:  WGS 84

fig2_pop <- read_xlsx("~/BSE/BSE/Term 2/Geospatial Data Science/Assignments/ex2_maps/eth_admpop_2023.xlsx", sheet = "ETH_admpop_adm3_2023")

join_data2 <- left_join(fig2_data, fig2_pop,by = c("ADM3_EN" = "admin3Name_en"))

fig2_data <- st_transform(fig2_data, crs = 4326)
fig2_roads <- st_transform(fig2_roads, crs = 4326)
fig2_elect <- st_transform(fig2_elect, crs = 4326)
fig2_gen <- st_transform(fig2_gen, crs = 4326)

fig2_roads <- st_zm(fig2_data)

fig2_gen$type <- 'Generators'
fig2_elect$type <- 'Electricity Grid'


fig2_plot <- ggplot() +
  geom_sf(data = join_data2, aes(fill = Total)) +  
  geom_sf(data = fig2_gen, aes(color = type), size = 2) +  # Use the new "type" column
  geom_sf(data = fig2_elect, aes(color = type), size = 1) +  # Use the new "type" column
  scale_color_manual(values = c("Generators" = "yellow", "Electricity Grid" = "red")) +  
  theme_minimal() +  
  theme(
    panel.grid.major = element_blank(),  # Remove major gridlines
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.border = element_rect(color = "black", fill = NA, size = 1)  # Add black borders
  ) +
  labs(color = "Infrastructure")  

print(fig2_plot)

Migration, Specialization and Trade: Evidence from Brazil’s March to the west (Fig.2 Population in Brazil, Mesa-regions)

For this graph, we used data from the 2010 census. We replicated the graph as it was presented in the original paper. The key difference in our approach was that while the paper’s graph was based on cities, ours was done by states. We determined the population share for each state and added a distinct red border to highlight the selected states in the meso region.

fig3_data <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/BRA_adm", 'BRA_adm1')

## Reading layer `BRA_adm1' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\BRA_adm' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.98971 ymin: -33.74708 xmax: -28.84694 ymax: 5.264878
## Geodetic CRS:  WGS 84

fig3_pop <- read_xlsx("population.xlsx")
fig3_pop <- fig3_pop[2:ncol(fig3_pop)]

fig3_data <- st_transform(fig3_data, crs = 4326)

brazil <- world %>% 
  filter(name_long == "Brazil")

merged_data <- left_join(fig3_data, fig3_pop, by = c("NAME_1" = "Brasil e Unidade da Federação"))

merged_data <- merged_data %>% 
  select("NAME_1","geometry","Pop2010") %>% 
  mutate(share = Pop2010/sum(Pop2010))

# List of states to highlight
states_to_highlight <- c("Amazonas", "Acre", "Roraima", "Amapá", 
                         "Pará", "Rondônia", "Mato Grosso", 
                         "Mato Grosso do Sul", "Goiás","Tocantins","Maranhão")

# Create a subset of data for these states
highlight_states <- merged_data[merged_data$NAME_1 %in% states_to_highlight, ]

# Merge the geometries of these states into a single shape
merged_highlight <- st_union(highlight_states)


fig3_plot <- ggplot() +
  geom_sf(data = merged_data, fill = "white", color = "black") +
  geom_sf(data = merged_data, aes(fill = share)) +
  theme(panel.background = element_rect(fill = "white"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),  
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
        legend.position = "bottom",
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),) +
  scale_fill_continuous(name = "Share of Population", low = "lightblue", high = "darkblue") +
  theme_minimal()+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())  # Remove gridlines

# Add the merged highlight layer
fig3_plot <- fig3_plot +
  geom_sf(data = merged_highlight, fill = NA, color = "red", linewidth = 1.5) 

# Print the plot
print(fig3_plot)

In harm’s way ? Infraestructure Investment and the presistence of coastal citites (Fig. 3: Vietnam’s Road Infraestrcture by Road type)

For this graph, we collected road data that was more detailed than we needed. To fix this, we made a new column in our data with simpler categories for the types of roads. We came up with these categories ourselves, so they might not match exactly what the original study used. This was the biggest challenge we faced with this graph. We wanted to make the data easier to understand and work with, but in doing that, there’s a chance we might have made some small mistakes or changed things a bit from the original study.

fig4_data <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/vnm_rdsl_2015_osm", 'vnm_rdsl_2015_OSM')

## Reading layer `vnm_rdsl_2015_OSM' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\vnm_rdsl_2015_osm' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 118138 features and 5 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 102.1562 ymin: 8.596338 xmax: 109.4536 ymax: 23.37756
## Geodetic CRS:  WGS 84

fig4_data <- st_transform(fig4_data, crs = 4326)

vietnam <- world %>% 
  filter(name_long == "Vietnam")

type_summary <- c(
  "footway" = "Others", "residential" = "Minor Roads", 
  "primary" = "Major Roads", "secondary" = "Major Roads",
  "pedestrian" = "Others", "tertiary" = "Minor Roads", 
  "trunk" = "Dual carriageway", "unclassified" = "Others",
  "service" = "Others", "motorway_link" = "Freeway", 
  "motorway" = "Freeway", "primary_link" = "Major Roads",
  "track" = "Others", "tertiary_link" = "Minor Roads", 
  "road" = "Others", "trunk_link" = "Dual carriageway",
  "secondary_link" = "Major Roads", "living_street" = "Others",
  "steps" = "Others", "path" = "Others", "construction" = "Others",
  "cycleway" = "Others", "proposed" = "Others", "crossing" = "Others",
  "services" = "Others", "rest_area" = "Others", "yes" = "Others"
)

fig4_data <- fig4_data %>%
  mutate(type2 = type_summary[type])

my_colors <- c("Dual carriageway" = "darkgreen", 
               "Freeway" = "blue", 
               "Major Roads" = "red",
               "Minor Roads" = "orange",
               "Others" = "yellow")

fig4_plot <- ggplot() +
  geom_sf(data = vietnam, color = "black", size = 0.5, fill = "white") +  
  geom_sf(data = fig4_data, aes(color = type2), size = 0.1) +  
  theme_minimal() +
  labs(color = "Type of Infrastructure") +
  scale_color_manual(values = my_colors) +
  theme(panel.background = element_rect(fill = "white"),
        panel.border = element_rect(color = "black", fill = NA, size = 1),  # Add a black border
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
        legend.position = "right",
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

# Print the plot
print(fig4_plot)

The effects of roads on trade and migration: evidence from a plant capital city (Fig.1 Brasil’s Capital and Main roads Infrasestructure)

For this graph, we first needed to get a map file that shows all of Brazil’s highways. We then sorted these highways into two groups: ones that go towards Brasilia, the capital, called radial highways, and the rest. The cool part is that you can tell if a highway is radial just by its code – if it starts with a zero, it’s radial; if not, it’s not.

Next, we needed to mark where each state is on the map. So, we made a list with the names of all the states and their locations.

The last thing we did was figure out the shortest way to connect all the cities without any loops, which is called a minimum spanning tree. We found a tool that helped us do this, and then we drew these connecting lines on the map between the cities.

library(igraph)  

sf.provinces <- st_read("C:/Users/enman/Documents/BSE/BSE/Term 2/Geospatial Data Science/Assignments/br-federal-highways-2020", 'SNV_202001A')

## Reading layer `SNV_202001A' from data source 
##   `C:\Users\enman\Documents\BSE\BSE\Term 2\Geospatial Data Science\Assignments\br-federal-highways-2020' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 7178 features and 28 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -73.70507 ymin: -33.69308 xmax: -32.40022 ymax: 4.483634
## Geodetic CRS:  SIRGAS 2000

highways <- st_transform(sf.provinces, crs = 4326)

highways$is_radial <- substr(highways$vl_br, 1, 1) == "0"

brazil <- world %>% 
  filter(name_long == "Brazil")

capitals <- data.frame(
  city = c("Rio Branco", "Maceió", "Macapá", "Manaus", "Salvador",
           "Fortaleza", "Brasília", "Vitória", "Goiânia", "São Luís",
           "Cuiabá", "Campo Grande", "Belo Horizonte", "Belém",
           "João Pessoa", "Curitiba", "Recife", "Teresina",
           "Rio de Janeiro", "Natal", "Porto Alegre", "Porto Velho",
           "Boa Vista", "Florianópolis", "São Paulo", "Aracaju", "Palmas"),
  lon = c(-67.8243, -35.7089, -51.0664, -60.0217, -38.5014,
          -38.5433, -47.9297, -40.3128, -49.2646, -44.3028,
          -56.0974, -54.6464, -43.9378, -48.4898, -34.845,
          -49.2731, -34.877, -42.8019, -43.1729, -35.2094,
          -51.2287, -63.9004, -60.6733, -48.548, -46.6333, -37.0731, -48.3336),
  lat = c(-9.97499, -9.64985, 0.03889, -3.11903, -12.9714,
          -3.71722, -15.7797, -20.3155, -16.6864, -2.52972,
          -15.601, -20.4428, -19.9208, -1.45502, -7.11949,
          -25.4278, -8.04756, -5.08917, -22.9068, -5.795,
          -30.0277, -8.76116, 2.81972, -27.5954, -23.5505, -10.9472, -10.1844)
)



capitals_sf <- st_as_sf(capitals, coords = c("lon", "lat"), crs = 4326)

dist_matrix <- st_distance(capitals_sf)

# Convert the distance matrix to a graph
graph <- graph_from_adjacency_matrix(dist_matrix, mode = "undirected", weighted = TRUE, diag = FALSE)

# Compute the MST
mst <- mst(graph)

# Convert the MST to a line object for plotting
edge_list <- get.edgelist(mst)
lines <- vector("list", length = nrow(edge_list))
for (i in seq_len(nrow(edge_list))) {
  lines[[i]] <- st_linestring(rbind(st_coordinates(capitals_sf[edge_list[i, 1], ]),
                                    st_coordinates(capitals_sf[edge_list[i, 2], ])))
}
mst_sf <- st_sfc(lines, crs = st_crs(capitals_sf))


legend_data <- data.frame(x = 1, y = 1, linetype = c("Radial Highway", "Non-Radial Highway", "MST"))

highway_plot <- ggplot() +
  geom_sf(data = brazil, fill = "white", color = "black") +
  geom_sf(data = highways, aes(linetype = ifelse(is_radial, "Radial Highway", "Non-Radial Highway")), color = "grey") +
  geom_sf(data = mst_sf, color = "red", size = 2, linetype = "dashed") + # MST as dashed lines
  geom_segment(data = legend_data, aes(x = x, y = y, xend = x, yend = y, linetype = linetype, color = linetype), show.legend = TRUE) + # Dummy lines for legend
  geom_point(data = capitals, aes(x = lon, y = lat), color = "black", size = 2) +
  geom_text(data = capitals, aes(x = lon, y = lat, label = city), vjust = "top", hjust = "right") +
  coord_sf(xlim = c(-74, -34), ylim = c(-34, 6), expand = FALSE) +
  theme_minimal() +
  labs(title = "Brazilian State Capitals and Minimum Spanning Tree", linetype = "Type of Line") +
  scale_linetype_manual(
    values = c("Radial Highway" = "solid", "Non-Radial Highway" = "dotted", "MST" = "dashed")
  ) +
  scale_color_manual(
    values = c("Radial Highway" = "black", "Non-Radial Highway" = "grey", "MST" = "red"),
    guide = "none"
  ) +
  theme(panel.background = element_rect(fill = "white"),
        panel.border = element_rect(color = "black", fill = NA, size = 1))

print(highway_plot)