We will create a map that shows the most commonly spoken languages in the US per state other than English and Spanish. The map is essentially a recreation of the one found here under the following link, but using the “leaflet” library:
The data is taken from the United States Census Bureau:
https://www.census.gov/programs-surveys/acs/
The information about longitude and latitude of the states is taken from here:
library(leaflet)
overview of geographic information for creating the pins:
ll <- read.csv("state_latlon_inkplant.csv", header = FALSE)
colnames(ll) <- c("Geography", "Latitude", "Longitude")
ll <- ll[!(ll$Geography == "District of Columbia"),]
ll <- ll[!(ll$Geography == "Alaska"),]
ll <- ll[!(ll$Geography == "Hawaii"),]
head(ll)
## Geography Latitude Longitude
## 1 Alabama 32.80667 -86.79113
## 3 Arizona 33.72976 -111.43122
## 4 Arkansas 34.96970 -92.37312
## 5 California 36.11620 -119.68156
## 6 Colorado 39.05981 -105.31110
## 7 Connecticut 41.59778 -72.75537
relevant languages:
languages <- c("Spanish", "French", "Italian", "Portuguese", "German", "Yiddish", "Other West Germanic", "Scandinavian", "Greek", "Russian", "Polish", "Serbo-Croatian", "Other Slavic", "Armenian", "Persian", "Gujarati", "Hindi", "Urdu", "Other Indic", "Other Indo-European", "Chinese", "Japanese", "Korean", "Hmong", "Thai", "Laotian", "Vietnamese", "Other Asian", "Tagalog", "Other Pacific Island", "Navajo", "Other Native North American", "Hungarian", "Arabic", "Hebrew", "African", "Other and unspecified")
data <- read.csv("ACS_14_5YR_B16001_with_ann.csv", skip = 1)
wells <- !grepl("well", colnames(data))
idc <- sapply(languages, function(x) {
tmp_string <- paste("^Estimate.*", x, sep = "")
tmp_idc <- grep(tmp_string, colnames(data))
tmp_idc <- tmp_idc[wells[tmp_idc]]
if (length(tmp_idc) == 0) {
tmp_idc <- NA
} else {
tmp_idc[[1]]
}
})
idc <- idc[!is.na(idc)]
data <- subset(data, select = colnames(data[c(3, idc)]))
colnames(data) <- c("Geography", names(idc))
# actually used languages:
colnames(data)
## [1] "Geography" "Spanish" "French" "Italian"
## [5] "Portuguese" "German" "Yiddish" "Scandinavian"
## [9] "Greek" "Russian" "Polish" "Armenian"
## [13] "Persian" "Gujarati" "Hindi" "Urdu"
## [17] "Chinese" "Japanese" "Korean" "Hmong"
## [21] "Thai" "Laotian" "Vietnamese" "Tagalog"
## [25] "Navajo" "Hungarian" "Arabic" "Hebrew"
## [29] "African"
data <- data[!(data$Geography == "District of Columbia"),]
data <- data[!(data$Geography == "Puerto Rico"),]
data <- data[!(data$Geography == "Alaska"),]
data <- data[!(data$Geography == "Hawaii"),]
language_winner <- apply(data[,3:29], 1, function(x) {
tmp_idx <- which.max(x)
tmp_idx <- tmp_idx + 2
colnames(data[tmp_idx])
})
library(leaflet)
ll %>%
leaflet() %>%
addTiles() %>%
addMarkers(popup = as.character(language_winner))
## Assuming 'Longitude' and 'Latitude' are longitude and latitude, respectively