Overview

This assignment explores the process of using web APIs to aquire data and then load that data into an R dataframe.

Get the data

# load necessary supporting packages
library(httr)
library(plyr)
library(dplyr)

# get the parts of the api request
url_base <-'http://api.nytimes.com/svc/semantic/v2/geocodes/'
query_header <- '/query.json?'
query_condition1 <- 'country_code=US'
api_key_header <- "api_key="
api_key <- readLines('C:/Users/cbailey/Desktop/CUNY MSDS/607 Data Aquisition and Management/Week9/nytimes_api_key.csv')

# construct api request and stored the results
response <- GET(paste(url_base
                      ,query_header
                      ,query_condition1
                      ,'&'
                      ,api_key_header
                      ,api_key
                      ,sep = ''))
 
# parse the api results
response_parsed <- content(response, "parse")

# element 4 of the parsed results contains the data of interest
# unlist element 4, which produces a list of lists
unlisted <- sapply(response_parsed[[4]], unlist)

# transpose each of the second-layer lists
unlisted_trans <- lapply(unlisted, t)

# load each of the transposed second-layer lists as dataframes
dfs <- lapply(unlisted_trans , data.frame , stringsAsFactors = FALSE)

# bind the individual dataframes (records) together as a single dataframe 
# filling in any missing values with NA
df_response <- rbind.fill(dfs)

#check names of the database columns
names(df_response)
##  [1] "concept_id"        "concept_name"      "geocode_id"       
##  [4] "geoname_id"        "name"              "latitude"         
##  [7] "longitude"         "elevation"         "population"       
## [10] "country_code"      "country_name"      "admin_code1"      
## [13] "admin_code2"       "admin_name1"       "admin_name2"      
## [16] "feature_class"     "feature_code"      "feature_code_name"
## [19] "time_zone_id"      "dst_offset"        "gmt_offset"       
## [22] "geocodes_created"  "geocodes_updated"
#tidy the data
df_response_slim <- df_response %>% 
  mutate(longitude = as.numeric(longitude)
         ,latitude = as.numeric(latitude)
         ,population = as.numeric(population)) %>%
  rename(state_code = admin_code1) %>%
  select (state_code, name, latitude, longitude)

df_response_slim
##    state_code                     name latitude  longitude
## 1          VA          Charlottesville 38.02931  -78.47668
## 2          PA             Philadelphia 39.95233  -75.16379
## 3          CO San Juan National Forest 37.69166 -107.80895
## 4          MA                Nantucket 41.28346  -70.09946
## 5          OR                  Yamhill 45.34150 -123.18733
## 6          MO               Ohio River 36.98672  -89.13062
## 7          NY                 Bellport 40.75704  -72.93927
## 8          NY            Sleepy Hollow 41.08565  -73.85847
## 9          MA                 Sandwich 41.75899  -70.49392
## 10         LA              New Orleans 29.95465  -90.07507
## 11         NJ                  Clifton 40.85843  -74.16375
## 12         MD                Baltimore 39.29039  -76.61219
## 13         NE                 Nebraska 41.50028  -99.75067
## 14         WI               Fish Creek 45.12777  -87.24705
## 15         NY                   Elmira 42.08980  -76.80773
## 16         CT                  Danbury 41.39482  -73.45401
## 17         VT                  Vermont 44.00034  -72.74983
## 18         CA                     Napa 38.29714 -122.28553
## 19         NY                  Warwick 41.25648  -74.35988
## 20         OR                  Newport 44.63678 -124.05345

Plot the Geographic Points

#load libraries needed for map plotting
library(ggplot2)
library(ggmap)

# load the USA states outlines
states <- map_data("state")
ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, group = group)
                 , fill = "lightblue"
                 , color = "black") + 
  
  coord_fixed(1.3) +      #adjusting scales for proper display
    guides(fill=FALSE) +  #turn off color legends
  theme_nothing() +       #turn off grid and axes
  geom_point(data = df_response_slim
             , aes(x = longitude, y = latitude)
             , color = "red"
             , size = 2)  #add api geo points

Conclusions

This project demonstrated one method of acquiring web API data and a rather simple way to load it into an R dataframe.

Lastly, since the data acquired was geographic data, the information was plotted on a map of the USA.