I saw that some super-awesome open gov developers pushed a whole bunch of data and other spiffy things to GitHub. They even have a web page describing the contents of a good portion of the (as of this post) 38 repositories.

I started scrolling through them and came across python-us. It’s self-described as “A package for easily working with US and state metadata”, and includes:

The tigris package does a great job with the shapefile access. We can get 99% of the functionality of the rest of the code by “borrowing” their pre-built SQLite data.db and using dplyr:

library(dplyr)

URL <- "https://github.com/unitedstates/python-us/raw/master/data.db"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)

states_db <- src_sqlite(fil)
states_tbl <- tbl(states_db, "states")

glimpse(states_tbl)
## Observations: 59
## Variables: 12
## $ fips           (chr) "01", "02", "04", "05", "06", "08", "09", "10",...
## $ name           (chr) "Alabama", "Alaska", "Arizona", "Arkansas", "Ca...
## $ abbr           (chr) "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE",...
## $ is_territory   (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ is_obsolete    (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ is_contiguous  (int) 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,...
## $ is_continental (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,...
## $ statehood_year (int) 1819, 1959, 1912, 1836, 1850, 1876, 1788, 1787,...
## $ capital        (chr) "Montgomery", "Juneau", "Phoenix", "Little Rock...
## $ capital_tz     (chr) "America/Chicago", "America/Anchorage", "Americ...
## $ ap_abbr        (chr) "Ala.", "Alaska", "Ariz.", "Ark.", "Calif.", "C...
## $ time_zones     (chr) "America/Chicago", "America/Anchorage,America/A...
filter(states_tbl, is_territory)
## Source: sqlite 3.8.6 [data.db]
## From: states [5 x 12]
## Filter: is_territory 
## 
##    fips                     name  abbr is_territory is_obsolete
##   (chr)                    (chr) (chr)        (int)       (int)
## 1    60           American Samoa    AS            1           0
## 2    66                     Guam    GU            1           0
## 3    69 Northern Mariana Islands    MP            1           0
## 4    72              Puerto Rico    PR            1           0
## 5    78           Virgin Islands    VI            1           0
## Variables not shown: is_contiguous (int), is_continental (int),
##   statehood_year (int), capital (chr), capital_tz (chr), ap_abbr (chr),
##   time_zones (chr)
filter(states_tbl, is_contiguous)
## Source: sqlite 3.8.6 [data.db]
## From: states [51 x 12]
## Filter: is_contiguous 
## 
##     fips                 name  abbr is_territory is_obsolete is_contiguous
##    (chr)                (chr) (chr)        (int)       (int)         (int)
## 1     01              Alabama    AL            0           0             1
## 2     04              Arizona    AZ            0           0             1
## 3     05             Arkansas    AR            0           0             1
## 4     06           California    CA            0           0             1
## 5     08             Colorado    CO            0           0             1
## 6     09          Connecticut    CT            0           0             1
## 7     10             Delaware    DE            0           0             1
## 8     11 District of Columbia    DC            0           0             1
## 9     12              Florida    FL            0           0             1
## 10    13              Georgia    GA            0           0             1
## ..   ...                  ...   ...          ...         ...           ...
## Variables not shown: is_continental (int), statehood_year (int), capital
##   (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)
filter(states_tbl, is_continental)
## Source: sqlite 3.8.6 [data.db]
## From: states [52 x 12]
## Filter: is_continental 
## 
##     fips                 name  abbr is_territory is_obsolete is_contiguous
##    (chr)                (chr) (chr)        (int)       (int)         (int)
## 1     01              Alabama    AL            0           0             1
## 2     02               Alaska    AK            0           0             0
## 3     04              Arizona    AZ            0           0             1
## 4     05             Arkansas    AR            0           0             1
## 5     06           California    CA            0           0             1
## 6     08             Colorado    CO            0           0             1
## 7     09          Connecticut    CT            0           0             1
## 8     10             Delaware    DE            0           0             1
## 9     11 District of Columbia    DC            0           0             1
## 10    12              Florida    FL            0           0             1
## ..   ...                  ...   ...          ...         ...           ...
## Variables not shown: is_continental (int), statehood_year (int), capital
##   (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)
filter(states_tbl, is_obsolete)
## Source: sqlite 3.8.6 [data.db]
## From: states [3 x 12]
## Filter: is_obsolete 
## 
##    fips               name  abbr is_territory is_obsolete is_contiguous
##   (chr)              (chr) (chr)        (int)       (int)         (int)
## 1    NA             Dakota    DK            0           1             1
## 2    NA            Orleans    OL            0           1             1
## 3    NA Philippine Islands    PI            0           1             0
## Variables not shown: is_continental (int), statehood_year (int), capital
##   (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)

Adding a wrapper for quicker access to looking up data by state (including metaphone) would enable this database to become a nice, compact package that would pair well with tigris. You could do something like:

library(PGRdup)

states_loc <- collect(states_tbl)
states_loc <- bind_cols(states_loc, DoubleMetaphone(states_loc$name))

ala <- DoubleMetaphone("Aleabama")
filter(states_loc, primary==ala$primary)
## Source: local data frame [1 x 14]
## 
##    fips    name  abbr is_territory is_obsolete is_contiguous
##   (chr)   (chr) (chr)        (int)       (int)         (int)
## 1    01 Alabama    AL            0           0             1
## Variables not shown: is_continental (int), statehood_year (int), capital
##   (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr), primary (chr),
##   alternate (chr)

This seems like it’d be a most excellent “starter” package for a college class or a personal project (for someone learning R).