#load processed survey data
load("landlord_survey_w_weights.RData")
land_zip <- land %>% select(q22)
# create row number ID
ll_id<-rownames(land_zip)
land_zip <- cbind(ll_id,land_zip)
This is a summary of the zip codes in the SRHS study and how the data were cleaned and categorized.
The zip codes in the SRHS survey were recorded as a free-form list in response to the question:
In which Seattle zip codes do you rent units? (please list all that apply, and separate by semicolons if there are multiple).
Responses varied, see the following for examples of responses:
land_zip %>%
filter(ll_id %in% c(1, 1097, 915, 3689, 2918, 360)) %>%
select(q22) %>%
rename(zipcodes=q22) %>%
head()
## zipcodes
## 1 98102, 98108
## 2 98117-5334
## 3 98126– 98136–98116
## 4 all 3 are in 98112
## 5 98119; 98115; 98117; etc. Too many to list
## 6 98122; 98103; ?
Because there were a variety of ways zip codes were reported, I only extracted 5-digit zip codes and ignored all other text or sub-codes.
In summary, 53 unique zip codes were reported by landlords.
# extract zip codes to different columns without other text
# reg expression didn't work below - commment out, try other approach
# df <- zipcodes %>%
# mutate(zcodes=str_remove_all(zcodes, text_pattern))%>%
# separate(zcodes, paste0("zip",1:20), sep = c(',|;|:|\\.|&|/')) %>%
# mutate(across(where(is.character), ~str_trim(.)))
# extract 5-digit numbers to new column, then separate into many columns
zipcodes <- land_zip %>%
mutate(zcodes = str_extract_all(q22, "\\b\\d{5}\\b") %>%
map_chr(toString)) %>%
separate(zcodes, paste0("zip",1:20), sep = c(',')) %>%
mutate(across(where(is.character), ~str_trim(.)))
# sum number of zip codes reported
zipcodes$z_sum <- as.numeric(rowSums((!is.na(zipcodes[3:22]) )))
# how many zip codes did landlords report
# knitr::kable(freq(zipcodes$z_sum,
# cumul = FALSE,
# report.nas = FALSE,
# totals = FALSE,
# display.type = FALSE,
# variable.label = "Num. of zipcodes reported"))
zipcodes %>%
ggplot(aes(x=z_sum, fill=factor(z_sum))) +
geom_bar() +
geom_text(data=subset(zipcodes, z_sum<5),
aes(label = sprintf('%s (%.0f%%)', after_stat(count), after_stat(count / sum(count) * 100))),
stat='count', hjust=-0.05, vjust =-.25, size = 3) +
scale_color_brewer(palette = "BrBG") +
scale_x_continuous(breaks=c(seq(1,20,1))) +
theme_classic() +
labs(x='Number of zip codes reported by landlords') +
theme(legend.position = "bottom", legend.title = element_blank()) +
guides(fill = guide_legend(nrow = 1, label.position = "top"))
To be updated…
A .csv with zip codes and frequency of zip code can be found here.
zipcodes <- zipcodes %>%
pivot_longer(starts_with("zip"),
names_to=("zip_num"),
values_to=("zipcode")) %>% na.omit()
zipcodes_table <- table(zipcodes$zipcode)
zipcodes_table <- sort(zipcodes_table)
write.csv(zipcodes_table,"C:/Users/court/Google Drive/Research/SRHS/zipcodes.csv", row.names = FALSE)