1 Preliminaries

1.1 Loading packages

library(sf)
library(tmap)
library(geodaData)

1.2 geodaData

ohio_lung <- geodaData::ohio_lung

2 Choropleth Map for Rates

2.1 Spatially extensive and intensive variables

We start our discussion of rate maps by illustrating something we should not be doing. This pertains to the important difference between a spatially extensive and a spatially intensive variable. In many applications that use public health data, we typically have access to: 1) a count of events, such as the number of cancer cases (a spatially extensive variable), as well as 2) to the relevant population at risk, which allows for the calculation of a rate (a spatially intensive variable).

2.1.1 Setting up the Boxmap option

get.var <- function(vname,df) {
  # function to extract a variable as a vector out of an sf data frame
  # arguments:
  #    vname: variable name (as character, in quotes)
  #    df: name of sf data frame
  # returns:
  #    v: vector with values (without a column name)
  v <- df[vname] %>% st_set_geometry(NULL)
  v <- unname(v[,1])
  return(v)
}

boxbreaks <- function(v,mult=1.5) {
  # break points for box map
  # arguments:
  #   v: vector with observations
  #   mult: multiplier for IQR (default 1.5)
  # returns:
  #   bb: vector with 7 break points
  # compute quartile and fences
  qv <- unname(quantile(v))
  iqr <- qv[4] - qv[2]
  upfence <- qv[4] + mult * iqr
  lofence <- qv[2] - mult * iqr
  # initialize break points vector
  bb <- vector(mode="numeric",length=7)
  # logic for lower and upper fences
  if (lofence < qv[1]) {  # no lower outliers
    bb[1] <- lofence
    bb[2] <- qv[1]
  } else {
    bb[2] <- lofence
    bb[1] <- qv[1]
  }
  if (upfence > qv[5]) { # no upper outliers
    bb[7] <- upfence
    bb[6] <- qv[5]
  } else {
    bb[6] <- upfence
    bb[7] <- qv[5]
  }
  bb[3:5] <- qv[2:4]
  return(bb)
}
boxmap <- function(vnam,df,legtitle=NA,mtitle="Box Map",mult=1.5){
  # box map
  # arguments:
  #   vnam: variable name (as character, in quotes)
  #   df: simple features polygon layer
  #   legtitle: legend title
  #   mtitle: map title
  #   mult: multiplier for IQR
  # returns:
  #   a tmap-element (plots a map)
  var <- get.var(vnam,df)
  bb <- boxbreaks(var)
  tm_shape(df) +
     tm_fill(vnam,title=legtitle,breaks=bb,palette="-RdBu",
          labels = c("lower outlier", "< 25%", "25% - 50%", "50% - 75%","> 75%", "upper outlier"))  +
  tm_borders() +
  tm_layout(title = mtitle, title.position = c("right","bottom"),legend.outside = TRUE, legend.outside.position = "right")
}
tmap_mode("view")

Boxmap

boxmap("LFW68", ohio_lung) +
  tm_basemap(server = "OpenStreetMap", alpha = 0.5)

2.2 Raw rate map

Now we make a map of the lung cancer counts (spatially extensive) and the raw population based rates (spatially intensive).

ohio_lung$raw_rate <- ohio_lung$LFW68 / ohio_lung$POPFW68
tmap_mode("plot")
p1 <- boxmap("LFW68", ohio_lung)
p2 <- boxmap("raw_rate", ohio_lung)
p2

tmap_arrange(p1, p2, ncol = 2)

3 Excess risk

3.1 Relative risk

sum_observed <- sum(ohio_lung$LFW68)
sum_population <- sum(ohio_lung$POPFW68)
p_i <- sum_observed / sum_population

Next we calculate the expected number of events for each county based on the reference rate for the whole state and the population for each county.

E_i <- p_i * ohio_lung$POPFW68

Lasty, we divide the actual count by the expected count to get the relative risk rate.

ohio_lung$smr <- ohio_lung$LFW68 / E_i

This ratio will show us which counties have a higher than expected number of lung cancer cases, and which counties have a lower than expected count.

3.2 Excess risk map

In the excess risk map, blue counties will indicate a risk lower than the state average, or SMRi<1. Red counties indicate a risk higher than the state average, or SMRi>1

p1 <- tm_shape(ohio_lung) +
     tm_fill("smr",title="Excess risk",breaks=c(-100,.25,.5,1,2,4,1000),labels = c("<.25", ".25 - .50", ".50 - 1.00","1.00 - 2.00", "2.00 - 4.00", "> 4.00" ), palette = "-RdBu")  +
  tm_borders() +
  tm_layout(legend.outside = TRUE, legend.outside.position = "right")
p1

Additionally, we can examine the excess risk rate in the form of a boxmap. The boxmap utilizes the full distribution of the rates to identify outliers, compared to the relative risk map, which identifies them as having a value greater than two.

p2 <- boxmap("smr",ohio_lung)
p2

tmap_arrange(p1,p2,ncol = 2)

4 Reference

  1. Rate Map