I saw that some super-awesome open gov developers pushed a whole bunch of data and other spiffy things to GitHub. They even have a web page describing the contents of a good portion of the (as of this post) 38 repositories.
I started scrolling through them and came across python-us. It’s self-described as “A package for easily working with US and state metadata”, and includes:
The tigris package does a great job with the shapefile access. We can get 99% of the functionality of the rest of the code by “borrowing” their pre-built SQLite data.db and using dplyr:
library(dplyr)
URL <- "https://github.com/unitedstates/python-us/raw/master/data.db"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)
states_db <- src_sqlite(fil)
states_tbl <- tbl(states_db, "states")
glimpse(states_tbl)
## Observations: 59
## Variables: 12
## $ fips (chr) "01", "02", "04", "05", "06", "08", "09", "10",...
## $ name (chr) "Alabama", "Alaska", "Arizona", "Arkansas", "Ca...
## $ abbr (chr) "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE",...
## $ is_territory (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ is_obsolete (int) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ is_contiguous (int) 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,...
## $ is_continental (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,...
## $ statehood_year (int) 1819, 1959, 1912, 1836, 1850, 1876, 1788, 1787,...
## $ capital (chr) "Montgomery", "Juneau", "Phoenix", "Little Rock...
## $ capital_tz (chr) "America/Chicago", "America/Anchorage", "Americ...
## $ ap_abbr (chr) "Ala.", "Alaska", "Ariz.", "Ark.", "Calif.", "C...
## $ time_zones (chr) "America/Chicago", "America/Anchorage,America/A...
filter(states_tbl, is_territory)
## Source: sqlite 3.8.6 [data.db]
## From: states [5 x 12]
## Filter: is_territory
##
## fips name abbr is_territory is_obsolete
## (chr) (chr) (chr) (int) (int)
## 1 60 American Samoa AS 1 0
## 2 66 Guam GU 1 0
## 3 69 Northern Mariana Islands MP 1 0
## 4 72 Puerto Rico PR 1 0
## 5 78 Virgin Islands VI 1 0
## Variables not shown: is_contiguous (int), is_continental (int),
## statehood_year (int), capital (chr), capital_tz (chr), ap_abbr (chr),
## time_zones (chr)
filter(states_tbl, is_contiguous)
## Source: sqlite 3.8.6 [data.db]
## From: states [51 x 12]
## Filter: is_contiguous
##
## fips name abbr is_territory is_obsolete is_contiguous
## (chr) (chr) (chr) (int) (int) (int)
## 1 01 Alabama AL 0 0 1
## 2 04 Arizona AZ 0 0 1
## 3 05 Arkansas AR 0 0 1
## 4 06 California CA 0 0 1
## 5 08 Colorado CO 0 0 1
## 6 09 Connecticut CT 0 0 1
## 7 10 Delaware DE 0 0 1
## 8 11 District of Columbia DC 0 0 1
## 9 12 Florida FL 0 0 1
## 10 13 Georgia GA 0 0 1
## .. ... ... ... ... ... ...
## Variables not shown: is_continental (int), statehood_year (int), capital
## (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)
filter(states_tbl, is_continental)
## Source: sqlite 3.8.6 [data.db]
## From: states [52 x 12]
## Filter: is_continental
##
## fips name abbr is_territory is_obsolete is_contiguous
## (chr) (chr) (chr) (int) (int) (int)
## 1 01 Alabama AL 0 0 1
## 2 02 Alaska AK 0 0 0
## 3 04 Arizona AZ 0 0 1
## 4 05 Arkansas AR 0 0 1
## 5 06 California CA 0 0 1
## 6 08 Colorado CO 0 0 1
## 7 09 Connecticut CT 0 0 1
## 8 10 Delaware DE 0 0 1
## 9 11 District of Columbia DC 0 0 1
## 10 12 Florida FL 0 0 1
## .. ... ... ... ... ... ...
## Variables not shown: is_continental (int), statehood_year (int), capital
## (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)
filter(states_tbl, is_obsolete)
## Source: sqlite 3.8.6 [data.db]
## From: states [3 x 12]
## Filter: is_obsolete
##
## fips name abbr is_territory is_obsolete is_contiguous
## (chr) (chr) (chr) (int) (int) (int)
## 1 NA Dakota DK 0 1 1
## 2 NA Orleans OL 0 1 1
## 3 NA Philippine Islands PI 0 1 0
## Variables not shown: is_continental (int), statehood_year (int), capital
## (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr)
Adding a wrapper for quicker access to looking up data by state (including metaphone) would enable this database to become a nice, compact package that would pair well with tigris. You could do something like:
library(PGRdup)
states_loc <- collect(states_tbl)
states_loc <- bind_cols(states_loc, DoubleMetaphone(states_loc$name))
ala <- DoubleMetaphone("Aleabama")
filter(states_loc, primary==ala$primary)
## Source: local data frame [1 x 14]
##
## fips name abbr is_territory is_obsolete is_contiguous
## (chr) (chr) (chr) (int) (int) (int)
## 1 01 Alabama AL 0 0 1
## Variables not shown: is_continental (int), statehood_year (int), capital
## (chr), capital_tz (chr), ap_abbr (chr), time_zones (chr), primary (chr),
## alternate (chr)
This seems like it’d be a most excellent “starter” package for a college class or a personal project (for someone learning R).