incubators <- read.csv("incubators_msa_all.csv", stringsAsFactors = FALSE, na.strings = "")

cities <- incubators %>% 
  filter(!is.na(area_fips)) %>% 
  select(city, state) %>% 
  mutate(city_state = paste(city, state, sep=", ")) %>%
  unique()

qcew_geo <- geocode(cities$city_state, output="more")

cities <- bind_cols(cities, qcew_geo)

# check they all match
cities_check <- cities %>% filter(city != locality)
print(paste(nrow(cities_check), "cities don't match"))

if (nrow(cities_check) > 0){
  print(paste("mismatched cities:"))
  cities_check
}

write.csv(cities, "../bls/qcew/qcew_geo_all.csv", row.names = FALSE, na = "")

incubators_geo <- incubators %>% 
  mutate(city_state = paste(city, state, sep=", ")) %>% 
  inner_join(cities %>% select(city_state, lon, lat, north, south, east, west), 
             by="city_state")
write.csv(incubators_geo, "incubators_msa_all_geo.csv", row.names = FALSE, na = "")
incubators <- read.csv("incubators_msa_all_geo.csv", stringsAsFactors = FALSE, na.strings = "")
techhubs <- read.csv("marketplace_techubs_msa.csv", stringsAsFactors = FALSE, na.strings = "")

incubators_techhubs <- techhubs %>% 
  select(area_fips, tech_focus, description) %>%
  inner_join(incubators, by="area_fips") %>%
  filter(!is.na(tech_focus))

write.csv(incubators_techhubs, "incubators_techhubs.csv", row.names = FALSE, na = "")

Companies per Incubator by State

incubators_geo <- read.csv("incubators_msa_all_geo.csv", stringsAsFactors = FALSE, na.strings = "")

ggplot(data=incubators_geo %>% filter(state != "HI"), 
       aes(x=lon, y=lat)) +
  borders("usa", colour="gray50", fill="gray50") +
  geom_point(aes(size=Companies, colour=state)) +
  scale_color_discrete(guide=FALSE) +
  scale_size_continuous(guide=FALSE)

Midwest Companies per Incubator by City and State

midwest <- c("CO", "IA", "IL", "MI", "MN", "MO", "NE", "OH", "WI")

ggplot(data=incubators_geo %>% filter(state %in% midwest), 
       aes(x=lon, y=lat)) +
  borders("usa", colour="gray50", fill="gray50", xlim=c(-80,-110), ylim=c(35,55)) +
  geom_point(aes(size=Companies, colour=reorder(city_state, -Companies))) +
  scale_color_discrete(name="location") +
  scale_size_continuous(guide=FALSE)

ggplot(data=incubators_geo %>% filter(state %in% midwest), 
       aes(x=lon, y=lat)) +
  borders("usa", colour="gray50", fill="gray50") +
  geom_point(aes(size=Companies, colour=reorder(state, -Companies))) +
  guides(colour=guide_legend(title="state")) +
  scale_size_continuous(guide=FALSE)

Tech Hub Industries vs Incubator Companies

incubators_techhubs <- read.csv("incubators_techhubs.csv", stringsAsFactors = FALSE, na.strings = "")

ggplot(data=incubators_techhubs, 
       aes(x=lon, y=lat)) +
  borders("usa", colour="gray50", fill="gray50") +
  geom_point(aes(size=Companies, colour=reorder(tech_focus, -Companies))) +
  guides(colour=guide_legend(title="industry", ncol=1)) +
  scale_size_continuous(guide=FALSE)

ggplot(data=incubators_techhubs %>% group_by(city_state) %>% mutate(company_sum = sum(Companies)), 
       aes(x=tech_focus, y=company_sum, fill=reorder(city_state, -company_sum))) +
  geom_bar(stat="identity", position="dodge") +
  guides(fill=guide_legend(title="location", ncol=1))

Sources

  1. Nguyen, Janet (March 2017). “Map: Emerging tech hubs and what they’re specializing in” Marketplace. https://www.marketplace.org/2017/05/24/tech/next-big-tech-hub/steve-case-rise-rest-technology-focus