Step 1: Install necessary libraries
# Load the libraries
library(sf)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(htmltools)
library(tigris)
library(leaflet)
library(htmlwidgets)
Step 2: Load and clean the dataset
# Load the dataset
data <- read.csv("/Users/ogeohia/Downloads/CleanedUnemploymentData.csv")
# Cleaning data: Rename columns and ensure the unemployment rate is numeric
data_clean <- data %>%
mutate(Unemployment_rate = as.numeric(Unemployment_rate))
# View cleaned data
head(data_clean)
## State Unemployment_rate
## 1 South Dakota 1.9
## 2 Vermont 2.4
## 3 North Dakota 2.5
## 4 New Hampshire 2.6
## 5 Nebraska 2.8
## 6 Connecticut 3.0
Step 3: Download US shapefile data for state boundaries
# Download US states shapefile (boundaries)
states <- states(cb = TRUE, progress = FALSE)
# Convert to 'sf' (simple features) format for easier handling
states_sf <- st_as_sf(states)
# View the shapefile data
head(states_sf)
## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
## Geodetic CRS: NAD83
## STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND
## 1 56 01779807 0400000US56 56 WY Wyoming 00 2.514587e+11
## 2 02 01785533 0400000US02 02 AK Alaska 00 1.478943e+12
## 3 24 01714934 0400000US24 24 MD Maryland 00 2.515199e+10
## 4 60 01802701 0400000US60 60 AS American Samoa 00 1.977591e+08
## 5 05 00068085 0400000US05 05 AR Arkansas 00 1.346608e+11
## 6 38 01779797 0400000US38 38 ND North Dakota 00 1.786943e+11
## AWATER geometry
## 1 1867503716 MULTIPOLYGON (((-111.0546 4...
## 2 245378425142 MULTIPOLYGON (((179.4825 51...
## 3 6979074857 MULTIPOLYGON (((-76.05015 3...
## 4 1307243751 MULTIPOLYGON (((-168.1458 -...
## 5 3121950081 MULTIPOLYGON (((-94.61792 3...
## 6 4414779956 MULTIPOLYGON (((-104.0487 4...
Step 4: Merge the unemployment data with the shapefile
# Merge the shapefile with unemployment data
merged_data <- left_join(states_sf, data_clean, by = c("NAME" = "State"))
# View merged data to check for any discrepancies
head(merged_data)
## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
## Geodetic CRS: NAD83
## STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND
## 1 56 01779807 0400000US56 56 WY Wyoming 00 2.514587e+11
## 2 02 01785533 0400000US02 02 AK Alaska 00 1.478943e+12
## 3 24 01714934 0400000US24 24 MD Maryland 00 2.515199e+10
## 4 60 01802701 0400000US60 60 AS American Samoa 00 1.977591e+08
## 5 05 00068085 0400000US05 05 AR Arkansas 00 1.346608e+11
## 6 38 01779797 0400000US38 38 ND North Dakota 00 1.786943e+11
## AWATER Unemployment_rate geometry
## 1 1867503716 3.5 MULTIPOLYGON (((-111.0546 4...
## 2 245378425142 4.7 MULTIPOLYGON (((179.4825 51...
## 3 6979074857 3.1 MULTIPOLYGON (((-76.05015 3...
## 4 1307243751 NA MULTIPOLYGON (((-168.1458 -...
## 5 3121950081 3.4 MULTIPOLYGON (((-94.61792 3...
## 6 4414779956 2.5 MULTIPOLYGON (((-104.0487 4...
# Check for missing values in the Unemployment_rate column
sum(is.na(merged_data$Unemployment_rate))
## [1] 5
# Remove rows where Unemployment_rate is NA or contains non-numeric values
merged_data <- merged_data %>% filter(!is.na(Unemployment_rate))
# Verify that the rows with missing values were removed
sum(is.na(data_clean$Unemployment_rate))
## [1] 0
# Drop unnecessary columns from merged_data
merged_data_cleaned <- merged_data %>%
select(STUSPS, NAME, Unemployment_rate, geometry)
Step 5: Create the choropleth map and legend