# install and load packages as needed.
pacman::p_load(sp, raster, leaflet, rgdal,
tidyverse, geosphere, rgeos,
viridis, stats, ggplot2,
kableExtra )
# Data Aquisition:
ETH_Adm_2 <- raster::getData("GADM",
country="ETH",
level =2)
ETH_malaria_data <- read.csv("https://raw.githubusercontent.com/HughSt/HughSt.github.io/master/course_materials/week1/Lab_files/Data/mal_data_eth_2009_no_dups.csv",header=T)
ETH_land_use <- raster("https://github.com/HughSt/HughSt.github.io/blob/master/course_materials/week2/Lab_files/ETH_land_use.tif?raw=true")
# Spatial Formatting:
ETH_malaria_data_SPDF <- SpatialPointsDataFrame(
coords = ETH_malaria_data[,c('longitude','latitude')], data = ETH_malaria_data[,c("examined",
"pf_pos",
"pf_pr")],
proj4string = CRS("+init=epsg:4326"))
# matching CRS.
ETH_malaria_data_SPDF <-
spTransform(ETH_malaria_data_SPDF, crs(ETH_Adm_2))
# Define points over Adm_2 polygons.
ETH_Adm_2_per_point <-
over(ETH_malaria_data_SPDF, ETH_Adm_2)
# sum() examined values at points over each polygon shape.
num_exams_per_Adm2 <- tapply(ETH_malaria_data_SPDF$examined,
ETH_Adm_2_per_point$NAME_2, sum)
# sum() disease positive values at point over each polygon shape.
num_pos_per_Adm2 <-
tapply(ETH_malaria_data_SPDF$pf_pos,
ETH_Adm_2_per_point$NAME_2, sum)
# Divide summed positive values over examination values.
prev_per_Adm2 <- num_pos_per_Adm2 / num_exams_per_Adm2
# create dataframe with prevalence values.
prev_per_Adm2_df <- data.frame(NAME_2 = names(prev_per_Adm2), prevalence = prev_per_Adm2,
row.names = NULL)
# merge prevalence dataframe with ETH_Adm_2 dataframe.
ETH_Adm_2 <- merge(ETH_Adm_2, prev_per_Adm2_df, by = "NAME_2")
# structure data for adm2 level table.
Adm_2_data <- ETH_Adm_2@data
Adm_2_tab <- Adm_2_data %>%
drop_na(prevalence) %>%
mutate(NAME_2 = as.factor(NAME_2)) %>%
group_by(NAME_2) %>%
summarise(prevalence)
# create table
kbl(Adm_2_tab,
col.names = c("Location", "Prevalence")) %>%
kable_styling(bootstrap_options = c("striped"), font_size = 12) %>%
column_spec(2, color = "white",
background = spec_color(Adm_2_tab$prevalence))
| Location | Prevalence |
|---|---|
| Agnuak | 0.0000000 |
| Arsi | 0.0000000 |
| Asosa | 0.0091743 |
| Bale | 0.0010619 |
| Borena | 0.0088224 |
| Debub Mirab Shewa | 0.0093458 |
| Guji | 0.0014144 |
| Horo Guduru | 0.0000000 |
| Ilubabor | 0.0000000 |
| Jimma | 0.0196347 |
| Kelem Wellega | 0.0000000 |
| Mirab Arsi | 0.0000000 |
| Mirab Hararghe | 0.0004114 |
| Mirab Shewa | 0.0046512 |
| Mirab Welega | 0.0004866 |
| Misraq Harerge | 0.0000000 |
| Misraq Shewa | 0.0000000 |
| Misraq Wellega | 0.0000000 |
# define palette and create map.
colorPal <- colorNumeric("viridis",
domain = ETH_Adm_2$prevalence)
Adm_2_prevs_map <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(
data= ETH_Adm_2, col=colorPal(ETH_Adm_2$prevalence),
fillOpacity = .3, weight = .7,
highlightOptions = highlightOptions(color = "black", weight = 3,
bringToFront = TRUE), popup = ETH_Adm_2$NAME_2) %>%
addLegend(pal = colorPal,
values = ETH_Adm_2$prevalence,
title = "Prevalence")
Adm_2_prevs_map
# extract() land use values present at points in malaria_data_SPDF.
ETH_malaria_data_SPDF$land_use <-
raster::extract(ETH_land_use, ETH_malaria_data_SPDF)
# rename values as per GLOBcover2009 convention.
land_cat_prev <- ETH_malaria_data_SPDF@data %>%
mutate(land_use = case_when(
land_use == 20 | land_use == 14 ~ "Cropland",
land_use == 60 | land_use == 40 ~ "Forest",
land_use == 110 ~ "Grassland",
land_use == 130 ~ "Shrubland",
land_use == 30 ~ "Vegetation"
)) %>%
drop_na(land_use) %>%
group_by(land_use) %>%
# summarise to table
summarise(mean_prev = mean(pf_pr, na.rm = FALSE)) %>%
mutate(land_use = fct_reorder(land_use, mean_prev))
# plot mean prevalence by land use type
ggplot(land_cat_prev) +
geom_col(aes(land_use, mean_prev, fill = land_use),
alpha = .6) +
labs(x= "Land Use Types") +
ylab("Test Positive Location, Prevalence > 0") +
ggtitle("Mean Prevalence, Malaria Positive by Land Use Type: 2009") +
theme(legend.position = "none") +
scale_fill_viridis_d()
# worldclim dataset.
climate <- getData("worldclim",var="bio",res=10)
# extract values matching malaria test site locations.
ETH_local_climate <-
raster::extract(climate, ETH_malaria_data_SPDF)
# transform to dataframe.
ETH_local_climate_df <-
as.data.frame(ETH_local_climate)
# select and name variables of interest.
ETH_local_climate_df <-
ETH_local_climate_df %>%
select(bio1, bio12) %>%
rename(annual_mean_temp = bio1,
annual_precip = bio12)
# bind climate data to SPDF object.
land_cat_w_clim <- cbind(ETH_malaria_data_SPDF@data, ETH_local_climate_df)
# structure for plotting.
ETH_climate_prevs <-
land_cat_w_clim %>%
mutate(land_use = factor(land_use)) %>%
mutate(land_use = case_when(
land_use == 20 | land_use == 14 ~ "Cropland",
land_use == 60 | land_use == 40 ~ "Forest",
land_use == 110 ~ "Grassland",
land_use == 130 ~ "Shrubland",
land_use == 30 ~ "Vegetation"
)) %>%
drop_na(land_use) %>%
mutate(land_use = fct_reorder(land_use, pf_pr)) %>%
mutate(annual_precip= as.numeric(annual_precip / 1000)) %>%
mutate(annual_mean_temp = as.numeric(annual_mean_temp / 10)) %>%
filter(pf_pr > 0)
# exploratory pearson cor-coef.
cor_temp <- cor(ETH_climate_prevs$annual_mean_temp,
ETH_climate_prevs$pf_pr)
# scatter plot with smooth line and pasted cor-coef value.
ggplot(ETH_climate_prevs) +
geom_smooth(aes(annual_mean_temp, pf_pr), color = "light blue", size = 1.5, alpha = .5) +
geom_point(aes(annual_mean_temp, pf_pr, color = land_use), alpha = .5, size = 2) +
ylim(-.05, .15) +
scale_color_viridis(discrete=TRUE, "Land Use Type") +
xlab("Average Annual Temperature (Celsius)") +
ylab("Test Positive Location, Prevalence > 0") +
ggtitle(label = "Malaria Prevalence and Average Annual Temperature",
subtitle = paste0("Pearson's Cor = ", round(cor_temp, 3)))
### Malaria test sites and precipitation: ###
# exploratory pearson cor-coef.
cor_precip <- cor(ETH_climate_prevs$annual_precip,
ETH_climate_prevs$pf_pr)
# scatter plot with smooth line and pasted cor-coef value.
ggplot(ETH_climate_prevs) +
geom_smooth(aes(annual_precip, pf_pr), color = "light blue", size = 1.5, alpha = .5) +
geom_point(aes(annual_precip, pf_pr, color = land_use), alpha = .5, size = 2) +
ylim(-.05, .15) +
xlab("Average Annual Precipitation (meters)") +
ylab("Test Positive Location, Prevalence > 0") +
scale_color_viridis(discrete=TRUE, "Land Use") +
ggtitle(label = "Malaria Prevalence and Average Annual Precipitation",
subtitle = paste0("Pearson's Cor = ", round(cor_precip, 3)))
# load African health facility locations data from local source.
af_health_cent <- readxl::read_xlsx("who-cds-gmp-2019-01-eng.xlsx")
# subset health facilities in Ethiopia
ETH_health_cent <- af_health_cent %>%
filter(Country == "Ethiopia") %>%
filter(Lat != "NA" & Long != "NA") %>%
select("Facility type", "Lat", "Long") %>%
rename(fac_type = "Facility type")
# transform to spatial object.
ETH_health_cent_SPDF <- SpatialPointsDataFrame(
coords = ETH_health_cent[,c('Long','Lat')],
data = ETH_health_cent[,'fac_type'],
proj4string = CRS("+init=epsg:4326"))
plot(ETH_health_cent_SPDF)
# generate distance matrix.
dist_matrix <- geosphere::distm(ETH_malaria_data_SPDF,
ETH_health_cent_SPDF)
# apply a min function to each row of malaria site data
ETH_malaria_data_SPDF$dist_to_health_cent <-
apply(dist_matrix, 1, min)
# structure malaria site data to plot.
plot_health_cents_dist <- ETH_malaria_data_SPDF@data %>%
select(pf_pr, dist_to_health_cent, land_use) %>%
mutate(dist_to_health_cent = as.numeric(round(dist_to_health_cent / 1000, 1))) %>%
mutate(land_use = factor(land_use)) %>%
mutate(land_use = case_when(
land_use == 20 | land_use == 14 ~ "Cropland",
land_use == 60 | land_use == 40 ~ "Forest",
land_use == 110 ~ "Grassland",
land_use == 130 ~ "Shrubland",
land_use == 30 ~ "Vegetation"
)) %>%
drop_na(land_use) %>%
mutate(land_use = fct_reorder(land_use, pf_pr)) %>%
filter(pf_pr > 0) %>%
# limit outlier value.
filter(dist_to_health_cent < 15)
# exploratory pearson cor-coef.
cor_dist <- cor(plot_health_cents_dist$dist_to_health_cent,
plot_health_cents_dist$pf_pr)
# scatter plot with smooth line and pasted cor-coef value.
ggplot(plot_health_cents_dist) +
geom_smooth(aes(dist_to_health_cent, pf_pr), color = "light blue", size = 1.5, alpha = .5) +
geom_point(aes(dist_to_health_cent, pf_pr, color = land_use), alpha = .5, size = 2) +
ylim(-.05, .15) +
xlab("Minimum Distance to Health Centre, Clinic, or Hosptial (Km)") +
ylab("Test Positive Location, Prevalence > 0") +
scale_color_viridis(discrete=TRUE, "Land Use Type") +
ggtitle(label = "Malaria Prevalence and Proximity to Health Services",
subtitle = paste0("Pearson's Cor = ", round(cor_dist, 3)))
~fin