Step 1: Install necessary libraries

# Load the libraries
library(sf)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(htmltools)
library(tigris)
library(leaflet)
library(htmlwidgets)

Step 2: Load and clean the dataset

# Load the dataset
data <- read.csv("/Users/ogeohia/Downloads/CleanedUnemploymentData.csv")

# Cleaning data: Rename columns and ensure the unemployment rate is numeric
data_clean <- data %>%
  mutate(Unemployment_rate = as.numeric(Unemployment_rate))

# View cleaned data
head(data_clean)

##           State Unemployment_rate
## 1  South Dakota               1.9
## 2       Vermont               2.4
## 3  North Dakota               2.5
## 4 New Hampshire               2.6
## 5      Nebraska               2.8
## 6   Connecticut               3.0

Step 3: Download US shapefile data for state boundaries

# Download US states shapefile (boundaries)
states <- states(cb = TRUE, progress = FALSE)

# Convert to 'sf' (simple features) format for easier handling
states_sf <- st_as_sf(states)

# View the shapefile data
head(states_sf)

## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
## Geodetic CRS:  NAD83
##   STATEFP  STATENS    AFFGEOID GEOID STUSPS           NAME LSAD        ALAND
## 1      56 01779807 0400000US56    56     WY        Wyoming   00 2.514587e+11
## 2      02 01785533 0400000US02    02     AK         Alaska   00 1.478943e+12
## 3      24 01714934 0400000US24    24     MD       Maryland   00 2.515199e+10
## 4      60 01802701 0400000US60    60     AS American Samoa   00 1.977591e+08
## 5      05 00068085 0400000US05    05     AR       Arkansas   00 1.346608e+11
## 6      38 01779797 0400000US38    38     ND   North Dakota   00 1.786943e+11
##         AWATER                       geometry
## 1   1867503716 MULTIPOLYGON (((-111.0546 4...
## 2 245378425142 MULTIPOLYGON (((179.4825 51...
## 3   6979074857 MULTIPOLYGON (((-76.05015 3...
## 4   1307243751 MULTIPOLYGON (((-168.1458 -...
## 5   3121950081 MULTIPOLYGON (((-94.61792 3...
## 6   4414779956 MULTIPOLYGON (((-104.0487 4...

Step 4: Merge the unemployment data with the shapefile

# Merge the shapefile with unemployment data
merged_data <- left_join(states_sf, data_clean, by = c("NAME" = "State"))

# View merged data to check for any discrepancies
head(merged_data)

## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
## Geodetic CRS:  NAD83
##   STATEFP  STATENS    AFFGEOID GEOID STUSPS           NAME LSAD        ALAND
## 1      56 01779807 0400000US56    56     WY        Wyoming   00 2.514587e+11
## 2      02 01785533 0400000US02    02     AK         Alaska   00 1.478943e+12
## 3      24 01714934 0400000US24    24     MD       Maryland   00 2.515199e+10
## 4      60 01802701 0400000US60    60     AS American Samoa   00 1.977591e+08
## 5      05 00068085 0400000US05    05     AR       Arkansas   00 1.346608e+11
## 6      38 01779797 0400000US38    38     ND   North Dakota   00 1.786943e+11
##         AWATER Unemployment_rate                       geometry
## 1   1867503716               3.5 MULTIPOLYGON (((-111.0546 4...
## 2 245378425142               4.7 MULTIPOLYGON (((179.4825 51...
## 3   6979074857               3.1 MULTIPOLYGON (((-76.05015 3...
## 4   1307243751                NA MULTIPOLYGON (((-168.1458 -...
## 5   3121950081               3.4 MULTIPOLYGON (((-94.61792 3...
## 6   4414779956               2.5 MULTIPOLYGON (((-104.0487 4...

# Check for missing values in the Unemployment_rate column
sum(is.na(merged_data$Unemployment_rate))

## [1] 5

# Remove rows where Unemployment_rate is NA or contains non-numeric values
merged_data <- merged_data %>% filter(!is.na(Unemployment_rate))

# Verify that the rows with missing values were removed
sum(is.na(data_clean$Unemployment_rate))

## [1] 0

# Drop unnecessary columns from merged_data
merged_data_cleaned <- merged_data %>%
  select(STUSPS, NAME, Unemployment_rate, geometry)

Step 5: Create the choropleth map and legend

UEL-DS-7002 WEEK 4 - FORMATIVE ASSESSMENT (Question 6)

Ogechukwu Einstein Ohia | R2303D16307933

Step 1: Install necessary libraries

Step 2: Load and clean the dataset

Step 3: Download US shapefile data for state boundaries

Step 4: Merge the unemployment data with the shapefile

Step 5: Create the choropleth map and legend