library(rnaturalearth)
library(ggplot2)
library(geosphere)
library(dplyr)
library(spData)
library(sf)
library(readxl)
library(gridExtra)
plot(ne_countries())
world <- ne_countries(scale = "large", returnclass = "sf")
# Check available column names and see all info variables available
colnames(world)
unique(world$continent)
##According to the READ_ME file on Github, you can import data with the following codes:
ports <- ne_download(
scale = 10,
type = "ports",
category = "cultural",
returnclass = "sf"
)
airports <- ne_download(
scale = 10,
type = "airports",
category = "cultural",
returnclass = "sf"
)
# Plot world map with population data
ggplot(data = world) +
geom_sf(aes(fill = pop_est)) +
scale_fill_viridis_c(trans = "log", name = "Population") +
theme_minimal() +
labs(title = "Total Population by Country",
caption = "Source: Natural Earth")
# Histogram: country population by continent
ggplot(world, aes(x = pop_est)) +
geom_histogram(fill = "blue", alpha = 0.7) +
facet_wrap(~continent) +
scale_x_log10() +
theme_minimal() +
labs(title = "Country Population by Continent",
x = "Population (log scale)",
y = "Frequency")
# Compute centroids of each country as baseline for "location" point
world$geometry <- st_make_valid(world$geometry)
world$centroid <- st_centroid(world$geometry)
# Function to calculate the average distance from a set of locations to the nearest point in another set
average_nearest_distance <- function(points, targets) {
distances <- apply(st_coordinates(points), 1, function(pt) {
min(distGeo(pt, st_coordinates(targets)))
})
return(mean(distances, na.rm = TRUE) / 1000) # Convert to km
}
# Compute average distances (airports and ports)
world$avg_dist_port <- sapply(1:nrow(world), function(i) {
average_nearest_distance(world$centroid[i], ports)
})
world$avg_dist_airport <- sapply(1:nrow(world), function(i) {
average_nearest_distance(world$centroid[i], airports)
})
# Histogram of distances by continent (airports)
ggplot(world, aes(x = avg_dist_airport)) +
geom_histogram(fill = "red") +
facet_wrap(~continent) +
theme_minimal() +
labs(title = "Histogram of Country-Level Average Distance to Airports",
x = "Distance (km)",
y = "Count")
# Histogram of distances by continent (ports)
ggplot(world, aes(x = avg_dist_port)) +
geom_histogram(fill = "green") +
facet_wrap(~continent) +
theme_minimal() +
labs(title = "Histogram of Country-Level Average Distance to Ports",
x = "Distance (km)",
y = "Count")
You can also embed plots, for example:
setwd("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment II")
Market_locations <- st_read("data/1.-Price--Production--and-Population-Data/Catchments/MktCatch6Plus5_Dissolve.shp")
plot(Market_locations)
market_centroids <- read_excel("data/1.-Price--Production--and-Population-Data/MktCoords.xlsx")
market_centroids_sf <- market_centroids %>%
st_as_sf(coords = c("longitude", "latitude"), crs=4326)
plot(market_centroids_sf)
African_countries <- world%>%
filter(world$continent=="Africa")
st_crs(market_centroids_sf)
st_crs(African_countries)
st_crs(ports)
st_is_valid(African_countries)
# Fix invalid geometries
African_countries <- st_make_valid(African_countries)
# Ensure ports and African_countries have the same CRS
ports <- st_transform(ports, st_crs(African_countries))
# Get vector for points inside Africa
ports_in_africa <- st_filter(ports, African_countries)
airports_in_africa <- st_filter(airports, African_countries)
# Plot Africa and the filtered ports
ggplot() +
geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
geom_sf(data = ports_in_africa, color = "black", size = 1)+
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Ports Located in Africa", subtitle = "Filtered using spatial intersection")
ggplot() +
geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
geom_sf(data = airports_in_africa, color = "darkgray", size = 1)+
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Airports Located in Africa", subtitle = "Filtered using spatial intersection")
roads <- ne_download(
scale = 10,
type = "roads",
category = "cultural",
returnclass = "sf"
)
coastline <- ne_download(
scale = 10,
type = "coastline",
category = "physical",
returnclass = "sf"
)
roads_in_africa <- st_filter(roads, African_countries)
coastline_africa <- st_filter(coastline, African_countries)
#FIRST PLOT
ggplot() +
geom_sf(data = African_countries, aes(fill = name), alpha = 0.4) +
geom_sf(data = roads_in_africa, color = "gray", size = 1)+
geom_sf(data = airports_in_africa, color = "darkblue", size = 1)+
geom_sf(data = market_centroids_sf, color = "darkred", size = 1)+
geom_sf(data = coastline_africa, color = "cyan", size = 0.5)+
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Markets, Airports, Coastline and Roads in Africa")
market_centroids_sf$distance_airport<- apply(st_distance(market_centroids_sf, airports_in_africa), 1, min)
market_centroids_sf$distance_port<- apply(st_distance(market_centroids_sf, ports_in_africa), 1, min)
market_centroids_sf$distance_coast<- apply(st_distance(market_centroids_sf, coastline_africa), 1, min)
market_centroids_sf$distance_road<- apply(st_distance(market_centroids_sf, roads_in_africa), 1, min)
market_prices <- read_excel("C:/Users/Juan Acuña/Desktop/BSE/T2/Geospatial Data Science/Assignment II/data/1.-Price--Production--and-Population-Data/PriceMaster4GAMS.xlsx")
# Convert 0s to NA
market_prices[market_prices == 0] <- NA
# Convert 0s to NA in numeric columns only
numeric_cols <- names(market_prices)[sapply(market_prices, is.numeric)]
market_prices[numeric_cols] <- lapply(market_prices[numeric_cols], function(x) ifelse(x == 0, NA, x))
#Delete Gulf and Bangkok that are not present in geospatial data
market_prices <- market_prices[market_prices$market != "Bangkok", ]
market_prices <- market_prices[market_prices$market != "Gulf", ]
unique(market_centroids$market)
unique(market_prices$market)
# Compute the average price for each row (excluding non-numeric columns)
market_prices <- market_prices %>%
mutate(avg_price = rowMeans(select(., `1`:`144`), na.rm = TRUE)) # Adjust range as needed
market_prices <- market_prices[, -c(5:148)]
market_prices <- market_prices %>%
group_by(mktcode, country, market) %>%
summarise(avg_price = mean(avg_price, na.rm = TRUE), .groups = "drop")
market_prices <- left_join(market_centroids_sf, market_prices, by = c("mktcode", "market"))
# Scatter plot: Avg price vs Distance to Airport
p1 <- ggplot(market_prices, aes(x = distance_airport, y = avg_price, label = market)) +
geom_point(color = "blue") +
geom_text(vjust = -0.5, size = 3) + # Adds market name above points
labs(title = "Average Price vs Distance to Airport",
x = "Distance to Airport (km)",
y = "Average Price ($)") +
theme_minimal()
# Scatter plot: Avg price vs Distance to Port
p2 <- ggplot(market_prices, aes(x = distance_port, y = avg_price, label = market)) +
geom_point(color = "red") +
geom_text(vjust = -0.5, size = 3) +
labs(title = "Average Price vs Distance to Port",
x = "Distance to Port (km)",
y = "Average Price ($)") +
theme_minimal()
# Scatter plot: Avg price vs Distance to Road
p3 <- ggplot(market_prices, aes(x = distance_road, y = avg_price, label = market)) +
geom_point(color = "green") +
geom_text(vjust = -0.5, size = 3) +
labs(title = "Average Price vs Distance to Road",
x = "Distance to Road (km)",
y = "Average Price ($)") +
theme_minimal()
grid.arrange(p1, p2, p3, ncol = 3)