Exploring the Datasets

Here we’ll read in the NIH and population data.The U.S. Department of Health and Human Services provides an online providing reporting tool to download NIH grant data by location and organization. The data can be downloaded all the way back to 1992 in an Excel format.

#read nih by state data
nih <- read.csv("2016 NIH funding per state.csv")

#read nih by city data
nih_by_city <- read.csv("2016 NIH funding by city geocoding result.csv")

#read population data
pop <- read.csv("2016 state population.csv")

#get summary of state data
summary(nih_by_city$STATE)
##      AB  AK  AL  AR  AS  AZ  BC  CA  CO  CT  DC  DE  FL  GA  GU  HI  IA 
## 164   3   5  20  12   1  29   4 416  39  33  41   8  61  41   1  11  19 
##  ID  IL  IN  KS  KY  LA  MA  MB  MD  ME  MI  MN  MO  MS  MT  NC  ND  NE 
##   6  76  25  10  26  21 190   2 145   9  43  32  34   6  15  96   3  12 
##  NH  NJ  NM  NV  NY  OH  OK  ON  OR  PA  PQ  PR  QC  RI  SC  SD  TN  TX 
##  14  58  17   6 178  61  14  12  43 103   4  12   2  15  23   6  23 109 
##  UT  VA  VI  VT  WA  WI  WV  WY 
##  30  76   1   6  68  40   5   3

Note that there are Canadian provinces and territories in the data, I’m going to throw those out in favor of a traditional 50 states layout.

#create subset of nih_by_city for only United States, excluding territories
nih_by_city_clean <- subset(nih_by_city, COUNTRY_1 =="UNITED STATES" & STATE != "AS" & STATE !=  "GU" & STATE != "VI" & STATE != "PR")

Joining files

For the NIH funding by state, I’m going to bring in a data frame containing population and do a join so I can calculate NIH funding per capita.

#rename state column in NIH file so it matches my population file
colnames(nih)[colnames(nih)=="LOCATION"] <- "State"

#merge NIH with Population file based on common field
nih_pop = merge(nih,pop)

#Add new field with NIH funding per capita
nih_pop$NIH_perCapita <- round((nih_pop$FUNDING / nih_pop$Population),1)

Download map data and make a choropleth map

For the NIH funding by state, I’m going to bring in a data frame containing population and do a join so I can calculate NIH funding per capita.

#download map data from highcharts
mapdata <- get_data_from_map(download_map_data("countries/us/us-all"))

#you can write this line of code to take preview the table in your viewer pane
glimpse(mapdata)
## Observations: 52
## Variables: 19
## $ hc-group    <chr> "admin1", "admin1", "admin1", "admin1", "admin1", ...
## $ hc-middle-x <dbl> 0.36, 0.56, 0.51, 0.47, 0.41, 0.43, 0.71, 0.46, 0....
## $ hc-middle-y <dbl> 0.47, 0.52, 0.67, 0.52, 0.38, 0.40, 0.67, 0.38, 0....
## $ hc-key      <chr> "us-ma", "us-wa", "us-ca", "us-or", "us-wi", "us-m...
## $ hc-a2       <chr> "MA", "WA", "CA", "OR", "WI", "ME", "MI", "NV", "N...
## $ labelrank   <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", ...
## $ hasc        <chr> "US.MA", "US.WA", "US.CA", "US.OR", "US.WI", "US.M...
## $ woe-id      <chr> "2347580", "2347606", "2347563", "2347596", "23476...
## $ state-fips  <chr> "25", "53", "6", "41", "55", "23", "26", "32", "35...
## $ fips        <chr> "US25", "US53", "US06", "US41", "US55", "US23", "U...
## $ postal-code <chr> "MA", "WA", "CA", "OR", "WI", "ME", "MI", "NV", "N...
## $ name        <chr> "Massachusetts", "Washington", "California", "Oreg...
## $ country     <chr> "United States of America", "United States of Amer...
## $ region      <chr> "Northeast", "West", "West", "West", "Midwest", "N...
## $ longitude   <chr> "-71.99930000000001", "-120.361", "-119.591", "-12...
## $ woe-name    <chr> "Massachusetts", "Washington", "California", "Oreg...
## $ latitude    <chr> "42.3739", "47.4865", "36.7496", "43.8333", "44.37...
## $ woe-label   <chr> "Massachusetts, US, United States", "Washington, U...
## $ type        <chr> "State", "State", "State", "State", "State", "Stat...
#make map of states with NIH funding per capita
hcmap("countries/us/us-all", data = nih_pop, value = "NIH_perCapita",
      joinBy = c("name", "State"), name = "2016 NIH Funding Per Capita",
      dataLabels = list(enabled = TRUE, format = '{point.properties.hc-a2}'),
      borderColor = "#FAFAFA", borderWidth = 0.1,
      tooltip = list(format = '{point.y:.0f}'))

Make a bubble map

Highmaps can also do point-based mapping. In the example below, I’m going to make a bubble map showing the amount of funding per city, indicated by the size of the bubble.

First, I’ll have to do some data cleaning because the NIH by city file contains multiple rows for each city. I’ll use the group_by and summarize function to dissolve based on city and sum the amount of grant funding.

#create dissolved nih_by_city_clean to aggregate data to cities
nih_by_city_clean_dissolved <- nih_by_city_clean %>%
  group_by(Match_addr) %>%
  summarize(total_funding = sum(FUNDING, na.rm = TRUE), city = first(City), state = first(STATE), lat = first(lat), lon = first(lon))

#create duplicate column called z for mapping based on funding field
nih_by_city_clean_dissolved$z <- nih_by_city_clean_dissolved$total_funding

#remove first row with no latitude/longitude
nih_by_city_clean_dissolved = nih_by_city_clean_dissolved[-1,]

#get us geojson data for bubble mapping
data("usgeojson")

#make bubble map of NIH funding locations
highchart(type = "map") %>% 
  hc_title(text = "NIH Funding Locations") %>%
  hc_add_series(mapData =usgeojson, showInLegend = FALSE,
                nullColor = "#D3D3D3", borderColor= '#A0A0A0') %>% 
  hc_add_series(data = nih_by_city_clean_dissolved, type = "mapbubble", name = "2016 Funding Location",  minSize = 1, maxSize = 30, 
                dataLabels = list(enabled = FALSE),
                color = 'rgba(57, 86, 139, 0.5)') %>% 
  hc_tooltip(useHTML = TRUE, headerFormat ="", 
             pointFormat = "<b>{point.Match_addr}:</b> <br> ${point.total_funding:,.0f}") %>%
  hc_mapNavigation(enabled = TRUE)