incubators <- read.csv("incubators_msa_all.csv", stringsAsFactors = FALSE, na.strings = "")
cities <- incubators %>%
filter(!is.na(area_fips)) %>%
select(city, state) %>%
mutate(city_state = paste(city, state, sep=", ")) %>%
unique()
qcew_geo <- geocode(cities$city_state, output="more")
cities <- bind_cols(cities, qcew_geo)
# check they all match
cities_check <- cities %>% filter(city != locality)
print(paste(nrow(cities_check), "cities don't match"))
if (nrow(cities_check) > 0){
print(paste("mismatched cities:"))
cities_check
}
write.csv(cities, "../bls/qcew/qcew_geo_all.csv", row.names = FALSE, na = "")
incubators_geo <- incubators %>%
mutate(city_state = paste(city, state, sep=", ")) %>%
inner_join(cities %>% select(city_state, lon, lat, north, south, east, west),
by="city_state")
write.csv(incubators_geo, "incubators_msa_all_geo.csv", row.names = FALSE, na = "")
incubators <- read.csv("incubators_msa_all_geo.csv", stringsAsFactors = FALSE, na.strings = "")
techhubs <- read.csv("marketplace_techubs_msa.csv", stringsAsFactors = FALSE, na.strings = "")
incubators_techhubs <- techhubs %>%
select(area_fips, tech_focus, description) %>%
inner_join(incubators, by="area_fips") %>%
filter(!is.na(tech_focus))
write.csv(incubators_techhubs, "incubators_techhubs.csv", row.names = FALSE, na = "")
Companies per Incubator by State
incubators_geo <- read.csv("incubators_msa_all_geo.csv", stringsAsFactors = FALSE, na.strings = "")
ggplot(data=incubators_geo %>% filter(state != "HI"),
aes(x=lon, y=lat)) +
borders("usa", colour="gray50", fill="gray50") +
geom_point(aes(size=Companies, colour=state)) +
scale_color_discrete(guide=FALSE) +
scale_size_continuous(guide=FALSE)

Midwest Companies per Incubator by City and State
midwest <- c("CO", "IA", "IL", "MI", "MN", "MO", "NE", "OH", "WI")
ggplot(data=incubators_geo %>% filter(state %in% midwest),
aes(x=lon, y=lat)) +
borders("usa", colour="gray50", fill="gray50", xlim=c(-80,-110), ylim=c(35,55)) +
geom_point(aes(size=Companies, colour=reorder(city_state, -Companies))) +
scale_color_discrete(name="location") +
scale_size_continuous(guide=FALSE)

ggplot(data=incubators_geo %>% filter(state %in% midwest),
aes(x=lon, y=lat)) +
borders("usa", colour="gray50", fill="gray50") +
geom_point(aes(size=Companies, colour=reorder(state, -Companies))) +
guides(colour=guide_legend(title="state")) +
scale_size_continuous(guide=FALSE)

Tech Hub Industries vs Incubator Companies
incubators_techhubs <- read.csv("incubators_techhubs.csv", stringsAsFactors = FALSE, na.strings = "")
ggplot(data=incubators_techhubs,
aes(x=lon, y=lat)) +
borders("usa", colour="gray50", fill="gray50") +
geom_point(aes(size=Companies, colour=reorder(tech_focus, -Companies))) +
guides(colour=guide_legend(title="industry", ncol=1)) +
scale_size_continuous(guide=FALSE)

ggplot(data=incubators_techhubs %>% group_by(city_state) %>% mutate(company_sum = sum(Companies)),
aes(x=tech_focus, y=company_sum, fill=reorder(city_state, -company_sum))) +
geom_bar(stat="identity", position="dodge") +
guides(fill=guide_legend(title="location", ncol=1))
