#load processed survey data
load("landlord_survey_w_weights.RData") 

land_zip <- land %>% select(q22)

# create row number ID
ll_id<-rownames(land_zip)
land_zip <- cbind(ll_id,land_zip)

This is a summary of the zip codes in the SRHS study and how the data were cleaned and categorized.



How zip codes were reported

The zip codes in the SRHS survey were recorded as a free-form list in response to the question:

In which Seattle zip codes do you rent units? (please list all that apply, and separate by semicolons if there are multiple).


Responses varied, see the following for examples of responses:

land_zip %>% 
  filter(ll_id %in% c(1, 1097, 915, 3689, 2918, 360)) %>%
  select(q22) %>%
  rename(zipcodes=q22) %>% 
  head()
##                                      zipcodes
## 1                                98102, 98108
## 2                                  98117-5334
## 3                          98126– 98136–98116
## 4                          all 3 are in 98112
## 5 98119; 98115; 98117; etc.  Too many to list
## 6                             98122; 98103; ?



How zip codes were cleaned

Because there were a variety of ways zip codes were reported, I only extracted 5-digit zip codes and ignored all other text or sub-codes.

In summary, 53 unique zip codes were reported by landlords.

# extract zip codes to different columns without other text

# reg expression didn't work below - commment out, try other approach

# df <- zipcodes %>% 
#   mutate(zcodes=str_remove_all(zcodes, text_pattern))%>% 
#   separate(zcodes, paste0("zip",1:20), sep = c(',|;|:|\\.|&|/')) %>%
#   mutate(across(where(is.character), ~str_trim(.)))


# extract 5-digit numbers to new column, then separate into many columns
zipcodes <- land_zip %>% 
  mutate(zcodes = str_extract_all(q22, "\\b\\d{5}\\b") %>%
                 map_chr(toString)) %>%
  separate(zcodes, paste0("zip",1:20), sep = c(',')) %>%
  mutate(across(where(is.character), ~str_trim(.)))



How many zip codes did landlords report each

  • 74% of landlords reported only one zipcode
  • 91% of landlords reported 2 or less
  • see figure below.
# sum number of zip codes reported
zipcodes$z_sum <- as.numeric(rowSums((!is.na(zipcodes[3:22]) )))

# how many zip codes did landlords report


# knitr::kable(freq(zipcodes$z_sum,
#                 cumul = FALSE,
#                 report.nas = FALSE,
#                 totals = FALSE,
#                 display.type = FALSE,
#                 variable.label = "Num. of zipcodes reported"))





zipcodes %>% 
  ggplot(aes(x=z_sum, fill=factor(z_sum))) +
  geom_bar() +
 geom_text(data=subset(zipcodes, z_sum<5),
    aes(label = sprintf('%s (%.0f%%)', after_stat(count), after_stat(count / sum(count) * 100))),
    stat='count', hjust=-0.05, vjust =-.25, size = 3) +
    scale_color_brewer(palette = "BrBG") +
  scale_x_continuous(breaks=c(seq(1,20,1))) +
  theme_classic() +
  labs(x='Number of zip codes reported by landlords') +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  guides(fill = guide_legend(nrow = 1, label.position = "top"))



How zip codes were categorized

To be updated…

A .csv with zip codes and frequency of zip code can be found here.

zipcodes <- zipcodes %>%
  pivot_longer(starts_with("zip"),
               names_to=("zip_num"),
               values_to=("zipcode")) %>% na.omit()


zipcodes_table <- table(zipcodes$zipcode) 
zipcodes_table <- sort(zipcodes_table)

write.csv(zipcodes_table,"C:/Users/court/Google Drive/Research/SRHS/zipcodes.csv", row.names = FALSE)