library(sf)
library(tmap)
library(geodaData)ohio_lung <- geodaData::ohio_lungWe start our discussion of rate maps by illustrating something we should not be doing. This pertains to the important difference between a spatially extensive and a spatially intensive variable. In many applications that use public health data, we typically have access to: 1) a count of events, such as the number of cancer cases (a spatially extensive variable), as well as 2) to the relevant population at risk, which allows for the calculation of a rate (a spatially intensive variable).
get.var <- function(vname,df) {
# function to extract a variable as a vector out of an sf data frame
# arguments:
# vname: variable name (as character, in quotes)
# df: name of sf data frame
# returns:
# v: vector with values (without a column name)
v <- df[vname] %>% st_set_geometry(NULL)
v <- unname(v[,1])
return(v)
}
boxbreaks <- function(v,mult=1.5) {
# break points for box map
# arguments:
# v: vector with observations
# mult: multiplier for IQR (default 1.5)
# returns:
# bb: vector with 7 break points
# compute quartile and fences
qv <- unname(quantile(v))
iqr <- qv[4] - qv[2]
upfence <- qv[4] + mult * iqr
lofence <- qv[2] - mult * iqr
# initialize break points vector
bb <- vector(mode="numeric",length=7)
# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- qv[1]
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- qv[5]
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]
return(bb)
}boxmap <- function(vnam,df,legtitle=NA,mtitle="Box Map",mult=1.5){
# box map
# arguments:
# vnam: variable name (as character, in quotes)
# df: simple features polygon layer
# legtitle: legend title
# mtitle: map title
# mult: multiplier for IQR
# returns:
# a tmap-element (plots a map)
var <- get.var(vnam,df)
bb <- boxbreaks(var)
tm_shape(df) +
tm_fill(vnam,title=legtitle,breaks=bb,palette="-RdBu",
labels = c("lower outlier", "< 25%", "25% - 50%", "50% - 75%","> 75%", "upper outlier")) +
tm_borders() +
tm_layout(title = mtitle, title.position = c("right","bottom"),legend.outside = TRUE, legend.outside.position = "right")
}tmap_mode("view")Boxmap
boxmap("LFW68", ohio_lung) +
tm_basemap(server = "OpenStreetMap", alpha = 0.5)Now we make a map of the lung cancer counts (spatially extensive) and the raw population based rates (spatially intensive).
ohio_lung$raw_rate <- ohio_lung$LFW68 / ohio_lung$POPFW68tmap_mode("plot")p1 <- boxmap("LFW68", ohio_lung)
p2 <- boxmap("raw_rate", ohio_lung)
p2tmap_arrange(p1, p2, ncol = 2)sum_observed <- sum(ohio_lung$LFW68)
sum_population <- sum(ohio_lung$POPFW68)
p_i <- sum_observed / sum_populationNext we calculate the expected number of events for each county based on the reference rate for the whole state and the population for each county.
E_i <- p_i * ohio_lung$POPFW68Lasty, we divide the actual count by the expected count to get the relative risk rate.
ohio_lung$smr <- ohio_lung$LFW68 / E_iThis ratio will show us which counties have a higher than expected number of lung cancer cases, and which counties have a lower than expected count.
In the excess risk map, blue counties will indicate a risk lower than the state average, or SMRi<1. Red counties indicate a risk higher than the state average, or SMRi>1
p1 <- tm_shape(ohio_lung) +
tm_fill("smr",title="Excess risk",breaks=c(-100,.25,.5,1,2,4,1000),labels = c("<.25", ".25 - .50", ".50 - 1.00","1.00 - 2.00", "2.00 - 4.00", "> 4.00" ), palette = "-RdBu") +
tm_borders() +
tm_layout(legend.outside = TRUE, legend.outside.position = "right")
p1Additionally, we can examine the excess risk rate in the form of a boxmap. The boxmap utilizes the full distribution of the rates to identify outliers, compared to the relative risk map, which identifies them as having a value greater than two.
p2 <- boxmap("smr",ohio_lung)
p2tmap_arrange(p1,p2,ncol = 2)