Analysis of Gender Difference in Unemployment Rate

Data

Using the American Community Survey data, I will make comparisons between unemployment rates in terms of the respondent’s gender. Both spatial and non-spatial visualization are used in order to see which one explains the better.

library(readr)
library(dplyr)
library(magrittr)
library(ggplot2)

unemp <- read_csv("C:/Users/jw/Downloads/R12145474_SL050.csv", col_names = TRUE)

## Parsed with column specification:
## cols(
##   .default = col_character()
## )

## See spec(...) for full column specifications.

unemp = unemp[-1, ]

unemp <- unemp %>% 
  rename(total_pop = "Total Population",
         GEOID = FIPS,
         labor_force = "In Labor Force 16 Years and Over:",
         labor_force_armed = "In Labor Force 16 Years and Over: in Armed Forces",
         labor_force_civil = "In Labor Force 16 Years and Over: Civilian",
         male_labor = "Civilian Male in Labor Force 16 Years and Over:",
         male_labor_emp = "Civilian Male in Labor Force 16 Years and Over: Employed",
         male_labor_unemp = "Civilian Male in Labor Force 16 Years and Over: Unemployed",
         female_labor = "Civilian Female in Labor Force 16 Years and Over:",
         female_labor_emp = "Civilian Female in Labor Force 16 Years and Over: Employed",
         female_labor_unemp = "Civilian Female in Labor Force 16 Years and Over: Unemployed") %>% 
  mutate(total_pop = as.numeric(total_pop),
         labor_force = as.integer(labor_force),
         labor_force_armed = as.integer(labor_force_armed),
         labor_force_civil = as.integer(labor_force_civil),
         male_labor = as.integer(male_labor),
         male_labor_emp = as.integer(male_labor_emp),
         male_labor_unemp = as.integer(male_labor_unemp),
         female_labor = as.integer(female_labor),
         female_labor_emp = as.integer(female_labor_emp),
         female_labor_unemp = as.integer(female_labor_unemp)) %>% 
  select(GEOID, total_pop, labor_force, labor_force_armed, labor_force_civil, male_labor, male_labor_emp, male_labor_unemp, female_labor, female_labor_emp, female_labor_unemp)
  
head(unemp)

summary(unemp)

##     GEOID             total_pop         labor_force      labor_force_armed
##  Length:3220        Min.   :      74   Min.   :     39   Min.   :    0.0  
##  Class :character   1st Qu.:   11214   1st Qu.:   4915   1st Qu.:    0.0  
##  Mode  :character   Median :   25848   Median :  11472   Median :    8.0  
##                     Mean   :  100768   Mean   :  50760   Mean   :  318.7  
##                     3rd Qu.:   66608   3rd Qu.:  31106   3rd Qu.:   49.0  
##                     Max.   :10105722   Max.   :5215695   Max.   :72524.0  
##  labor_force_civil   male_labor      male_labor_emp    male_labor_unemp  
##  Min.   :     39   Min.   :     19   Min.   :     19   Min.   :     0.0  
##  1st Qu.:   4910   1st Qu.:   2648   1st Qu.:   2443   1st Qu.:   159.0  
##  Median :  11440   Median :   6128   Median :   5656   Median :   427.5  
##  Mean   :  50441   Mean   :  26543   Mean   :  24749   Mean   :  1794.0  
##  3rd Qu.:  30912   3rd Qu.:  16352   3rd Qu.:  15132   3rd Qu.:  1185.2  
##  Max.   :5212243   Max.   :2816484   Max.   :2603083   Max.   :213401.0  
##   female_labor     female_labor_emp  female_labor_unemp
##  Min.   :     20   Min.   :     20   Min.   :     0.0  
##  1st Qu.:   2258   1st Qu.:   2101   1st Qu.:   122.8  
##  Median :   5329   Median :   4937   Median :   334.0  
##  Mean   :  23898   Mean   :  22344   Mean   :  1554.1  
##  3rd Qu.:  14664   3rd Qu.:  13835   3rd Qu.:   996.8  
##  Max.   :2395759   Max.   :2202734   Max.   :193025.0

Retrieving County-level Shapefile using Tigris:

Merging data and excluding peripheral states:

unemp$GEOID = parse_integer(unemp$GEOID)
t_county$GEOID = parse_integer(t_county$GEOID)
comb_data = left_join(t_county, unemp, by="GEOID")

t_comb_data_sub <- comb_data %>% 
  filter(STATEFP !="02") %>% 
  filter(STATEFP !="15") %>% 
  filter(STATEFP !="60") %>% 
  filter(STATEFP !="66") %>% 
  filter(STATEFP !="69") %>% 
  filter(STATEFP !="72") %>% 
  filter(STATEFP !="78")

Mapping: Gender Differences in the Unemployment Rate by County

library(tmap)
library(tmaptools)
library(RColorBrewer)
us_states <- t_comb_data_sub %>% 
  aggregate_map(by="STATEFP")

t_comb_data_sub = t_comb_data_sub %>% 
  group_by(GEOID) %>% 
  mutate(emp_diff = male_labor_emp - female_labor_emp,
         proportion = labor_force / total_pop,
         male_prop = male_labor_unemp / male_labor,
         female_prop = female_labor_unemp / female_labor,
         geder_diff = male_prop - female_prop)

tm_shape(t_comb_data_sub, projection = 2163) + tm_polygons(col=c('male_prop', 'female_prop'), breaks=c(0, 0.1, 0.2, 0.3, 0.4), palette="YlOrRd", midpoint=0, border.col="grey", border.alpha = .3, title = c("Male Unemployment Rates", "Female Unemployment Rates")) + tm_shape(us_states) + tm_borders(lwd = .28, col = "black", alpha = 1) +tm_layout(panel.labels=c("Male Unemployment among Male Labor Force", "Female Unemployment among Female Labor Force"), legend.position = c("left", "bottom"))

Areas with the highest unemployment rate for both males and females are located around southern west. However, areas with higher unemployment rate for males are more spreadout while areas with higher unemployment rate for females are more concentrated in the southern west counties.

Density Plot:

ggplot(t_comb_data_sub, aes(x=male_prop)) + geom_density(fill="lightblue") + geom_vline(data=t_comb_data_sub, aes(xintercept=mean(male_prop))) + geom_density(aes(x=female_prop), fill="pink", alpha=.4) + geom_vline(data=t_comb_data_sub, aes(xintercept=mean(female_prop)), linetype="dashed") + labs(title = "Unemployment Rate by Gender", x = "Unemployment Rate among the Same Gender", y = "Density")

While the above maps show how the areas with higher unemployment rates for males and females are distributed, a density plot tells us that the mean unemployment rate for females (dashed line) is slightly lower than the mean unemployment rate for males (straight line).

cb=TRUE vs cb=FALSE:

#cb=TRUE
tm_shape(t_comb_data_sub, projection=2163) + tm_polygons('male_prop', breaks=c(0,0.1,0.2,0.3,0.4), style="fixed", palette="YlOrRd", border.col="grey", border.alpha=.4) + tm_shape(us_states) + tm_borders(lwd=.36, col="black", alpha=1) + tm_layout(panel.labels="cb=TRUE", legend.position = c("left", "bottom"))

t_county2$GEOID = parse_integer(t_county2$GEOID)
comb_data2 = left_join(t_county2, unemp, by="GEOID")
t_comb_data_sub2 <- comb_data2 %>% 
  filter(STATEFP !="02") %>% 
  filter(STATEFP !="15") %>% 
  filter(STATEFP !="60") %>% 
  filter(STATEFP !="66") %>% 
  filter(STATEFP !="69") %>% 
  filter(STATEFP !="72") %>% 
  filter(STATEFP !="78")
us_states2 <- t_comb_data_sub2 %>% 
  aggregate_map(by="STATEFP")

t_comb_data_sub2 = t_comb_data_sub2 %>% 
  group_by(GEOID) %>% 
  mutate(male_prop = male_labor_unemp / male_labor)

tm_shape(t_comb_data_sub2, projection=2163) + tm_polygons('male_prop', breaks=c(0,0.1,0.2,0.3,0.4), style="fixed", palette="YlOrRd", border.col="grey", border.alpha=.4) + tm_shape(us_states2) + tm_borders(lwd=.36, col="black", alpha=1) + tm_layout(panel.labels="cb=FALSE", legend.position = c("left", "bottom"))

When cb=FALSE, more features are included: NAMELSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, INPTLAT, INTPLON. A map using cb=TRUE draws the geographic boundaries more detailed and realistically compared to the map using cb=FALSE. However, the data points on the maps do not seem affected, thus, (at least in this analysis) the final results on both maps do not seem to be different.