Read in the raw data

setwd("C:\\Users\\sfraley\\Downloads\\PCV")

areas <- readxl::read_excel("Zip Codes_Rural and Urban_Reference.xlsx", sheet =2,
                  col_types = c("text","numeric","numeric","numeric","numeric","numeric")) %>%
  rename("Zip" = `Zip Code`)

wage <- readxl::read_excel("Employee Wage and Zip Code Data.xlsx", sheet =1,
                            col_types = c("text","numeric","text","text")) %>%
  rename("Zip" = `Zip Code of Residence`)

\(-\) \(-\) \(-\) \(-\)

Create a “rural.flag” variable, for zip codes that have over 50% rural population. Join zip code characteristics to wage data.

areas <- areas %>%
  mutate(rural.flag = ifelse(`% Rural` > 0.5, 1,0))

join <- wage %>%
  left_join(areas, by = c("Zip" = "Zip")) %>%
  mutate(count = 1) %>%
  na.omit()
\(-\) \(-\) \(-\) \(-\)
“join” data table