Part I

library(rnaturalearth)
library(ggplot2)
library(geosphere) 
library(dplyr)
library(spData)
library(sf)
library(readxl)
library(gridExtra)

plot(ne_countries())

world <- ne_countries(scale = "large", returnclass = "sf")

# Check available column names and see all info variables available
colnames(world)

unique(world$continent)

##According to the READ_ME file on Github, you can import data with the following codes:

ports <- ne_download(
  scale = 10,
  type = "ports",
  category = "cultural",
  returnclass = "sf"
)

airports <- ne_download(
  scale = 10,
  type = "airports",
  category = "cultural",
  returnclass = "sf"
)

# Plot world map with population data
ggplot(data = world) +
  geom_sf(aes(fill = pop_est)) +
  scale_fill_viridis_c(trans = "log", name = "Population") +
  theme_minimal() +
  labs(title = "Total Population by Country",
       caption = "Source: Natural Earth")

# Histogram: country population by continent
ggplot(world, aes(x = pop_est)) +
  geom_histogram(fill = "blue", alpha = 0.7) +
  facet_wrap(~continent) +
  scale_x_log10() +
  theme_minimal() +
  labs(title = "Country Population by Continent",
       x = "Population (log scale)",
       y = "Frequency")

# Compute centroids of each country as baseline for "location" point

world$geometry <- st_make_valid(world$geometry)
world$centroid <- st_centroid(world$geometry)

# Function to calculate the average distance from a set of locations to the nearest point in another set
average_nearest_distance <- function(points, targets) {
  distances <- apply(st_coordinates(points), 1, function(pt) {
    min(distGeo(pt, st_coordinates(targets)))
  })
  return(mean(distances, na.rm = TRUE) / 1000)  # Convert to km
}

# Compute average distances (airports and ports)
world$avg_dist_port <- sapply(1:nrow(world), function(i) {
  average_nearest_distance(world$centroid[i], ports)
})

world$avg_dist_airport <- sapply(1:nrow(world), function(i) {
  average_nearest_distance(world$centroid[i], airports)
})


# Histogram of distances by continent (airports)
ggplot(world, aes(x = avg_dist_airport)) +
  geom_histogram(fill = "red") +
  facet_wrap(~continent) +
  theme_minimal() +
  labs(title = "Histogram of Country-Level Average Distance to Airports",
       x = "Distance (km)",
       y = "Count")

# Histogram of distances by continent (ports)
ggplot(world, aes(x = avg_dist_port)) +
  geom_histogram(fill = "green") +
  facet_wrap(~continent) +
  theme_minimal() +
  labs(title = "Histogram of Country-Level Average Distance to Ports",
       x = "Distance (km)",
       y = "Count")

PART II

You can also embed plots, for example:

setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment II")

Market_locations <- st_read("data/1.-Price--Production--and-Population-Data/Catchments/MktCatch6Plus5_Dissolve.shp")

plot(Market_locations)

market_centroids <- read_excel("data/1.-Price--Production--and-Population-Data/MktCoords.xlsx")

market_centroids_sf <- market_centroids %>%
  st_as_sf(coords = c("longitude", "latitude"), crs=4326)
  
plot(market_centroids_sf)

African_countries <- world%>%
  filter(world$continent=="Africa")

st_crs(market_centroids_sf)
st_crs(African_countries)
st_crs(ports)

st_is_valid(African_countries)

# Fix invalid geometries
African_countries <- st_make_valid(African_countries)

# Ensure ports and African_countries have the same CRS
ports <- st_transform(ports, st_crs(African_countries))

# Get vector for points inside Africa
ports_in_africa <- st_filter(ports, African_countries)
airports_in_africa <- st_filter(airports, African_countries)



# Plot Africa and the filtered ports
ggplot() +
  geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
  geom_sf(data = ports_in_africa, color = "black", size = 1)+
  theme_minimal() +
  theme(legend.position = "none") +
  labs(title = "Ports Located in Africa", subtitle = "Filtered using spatial intersection")

ggplot() +
  geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
  geom_sf(data = airports_in_africa, color = "darkgray", size = 1)+
  theme_minimal() +
  theme(legend.position = "none") +
  labs(title = "Airports Located in Africa", subtitle = "Filtered using spatial intersection")

roads <- ne_download(
  scale = 10,
  type = "roads",
  category = "cultural",
  returnclass = "sf"
)

coastline <- ne_download(
  scale = 10,
  type = "coastline",
  category = "physical",
  returnclass = "sf"
)


roads_in_africa <- st_filter(roads, African_countries)
coastline_africa <- st_filter(coastline, African_countries)

#FIRST PLOT
ggplot() +
  geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
  geom_sf(data = roads_in_africa, color = "gray", size = 1)+
  geom_sf(data = airports_in_africa, color = "darkblue", size = 1)+
  geom_sf(data = market_centroids_sf, color = "darkred", size = 1)+
  geom_sf(data = coastline_africa, color = "cyan", size = 0.5)+
  theme_minimal() +
  theme(legend.position = "none") +
  labs(title = "Markets, Airports, Coastline and Roads in Africa")

market_centroids_sf$distance_airport<- apply(st_distance(market_centroids_sf, airports_in_africa), 1, min)
market_centroids_sf$distance_port<- apply(st_distance(market_centroids_sf, ports_in_africa), 1, min)
market_centroids_sf$distance_coast<- apply(st_distance(market_centroids_sf, coastline_africa), 1, min)
market_centroids_sf$distance_road<- apply(st_distance(market_centroids_sf, roads_in_africa), 1, min)

market_prices <- read_excel("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment II/data/1.-Price--Production--and-Population-Data/PriceMaster4GAMS.xlsx")

# Convert 0s to NA
market_prices[market_prices == 0] <- NA

# Convert 0s to NA in numeric columns only
numeric_cols <- names(market_prices)[sapply(market_prices, is.numeric)]
market_prices[numeric_cols] <- lapply(market_prices[numeric_cols], function(x) ifelse(x == 0, NA, x))


#Delete Gulf and Bangkok that are not present in geospatial data
market_prices <- market_prices[market_prices$market != "Bangkok", ]
market_prices <- market_prices[market_prices$market != "Gulf", ]

unique(market_centroids$market)

unique(market_prices$market)


# Compute the average price for each row (excluding non-numeric columns)
market_prices <- market_prices %>%
  mutate(avg_price = rowMeans(select(., `1`:`144`), na.rm = TRUE))  # Adjust range as needed

market_prices <- market_prices[, -c(5:148)]

market_prices <- market_prices %>%
  group_by(mktcode, country, market) %>%
  summarise(avg_price = mean(avg_price, na.rm = TRUE), .groups = "drop")

market_prices <- left_join(market_centroids_sf, market_prices, by = c("mktcode", "market"))

# Scatter plot: Avg price vs Distance to Airport
p1 <- ggplot(market_prices, aes(x = distance_airport, y = avg_price, label = market)) +
  geom_point(color = "blue") +
  geom_text(vjust = -0.5, size = 3) +  # Adds market name above points
  labs(title = "Average Price vs Distance to Airport",
       x = "Distance to Airport (km)", 
       y = "Average Price ($)") +
  theme_minimal()

# Scatter plot: Avg price vs Distance to Port
p2 <- ggplot(market_prices, aes(x = distance_port, y = avg_price, label = market)) +
  geom_point(color = "red") +
  geom_text(vjust = -0.5, size = 3) +
  labs(title = "Average Price vs Distance to Port",
       x = "Distance to Port (km)", 
       y = "Average Price ($)") +
  theme_minimal()

# Scatter plot: Avg price vs Distance to Road
p3 <- ggplot(market_prices, aes(x = distance_road, y = avg_price, label = market)) +
  geom_point(color = "green") +
  geom_text(vjust = -0.5, size = 3) +
  labs(title = "Average Price vs Distance to Road",
       x = "Distance to Road (km)", 
       y = "Average Price ($)") +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)