This project uses the dataset from the FiveThirtyEight article “Where Are America’s Librarians?”. We load the raw CSV from GitHub, keep a few key columns, and rename them.
data_url <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/librarians/librarians-by-msa.csv"
raw <- readr::read_csv(data_url, show_col_types = FALSE) %>%
janitor::clean_names()
glimpse(raw)
## Rows: 373
## Columns: 6
## $ prim_state <chr> "OH", "WA", "PR", "PR", "PR", "PR", "KY", "NY", "MD", "CT…
## $ area_name <chr> "Springfield, OH", "Mount Vernon-Anacortes, WA", "Aguadil…
## $ tot_emp <chr> "**", "**", "160", "180", "50", "80", "100", "2530", "114…
## $ emp_prse <chr> "**", "**", "4.4", "3.6", "0.2", "1.4", "34", "6.6", "19.…
## $ jobs_1000 <chr> "**", "**", "3.727", "3.434", "2.679", "2.563", "2.099", …
## $ loc_quotient <chr> "**", "**", "3.62", "3.34", "2.6", "2.49", "2.04", "1.99"…
cleaned <- raw %>%
select(prim_state, area_name, tot_emp, jobs_1000, loc_quotient) %>%
rename(
state = prim_state,
metro_area = area_name,
total_employed = tot_emp,
librarians_per_1000 = jobs_1000,
location_quotient = loc_quotient
) %>%
mutate(
total_employed = readr::parse_number(total_employed),
librarians_per_1000 = suppressWarnings(as.numeric(librarians_per_1000)),
location_quotient = suppressWarnings(as.numeric(location_quotient))
)
head(cleaned)
## # A tibble: 6 × 5
## state metro_area total_employed librarians_per_1000 location_quotient
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 OH Springfield, OH NA NA NA
## 2 WA Mount Vernon-Anaco… NA NA NA
## 3 PR Aguadilla-Isabela-… 160 3.73 3.62
## 4 PR Ponce, PR 180 3.43 3.34
## 5 PR San German-Cabo Ro… 50 2.68 2.6
## 6 PR Mayaguez, PR 80 2.56 2.49
summary(cleaned[, c("total_employed","librarians_per_1000","location_quotient")])
## total_employed librarians_per_1000 location_quotient
## Min. : 30.0 Min. :0.163 Min. :0.160
## 1st Qu.: 70.0 1st Qu.:0.819 1st Qu.:0.795
## Median : 110.0 Median :1.024 Median :0.990
## Mean : 305.7 Mean :1.081 Mean :1.050
## 3rd Qu.: 285.0 3rd Qu.:1.270 3rd Qu.:1.230
## Max. :5440.0 Max. :3.727 Max. :3.620
## NA's :2 NA's :2 NA's :2
tidyverse
, janitor
,
rmarkdown
, knitr