Sungji Peter Shin
5.05.2019
Using the American Community Survey data, I will make comparisons between unemployment rates in terms of the respondent’s gender. Both spatial and non-spatial visualization are used in order to see which one explains the better.
library(readr)
library(dplyr)
library(magrittr)
library(ggplot2)
unemp <- read_csv("C:/Users/jw/Downloads/R12145474_SL050.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
unemp = unemp[-1, ]
unemp <- unemp %>%
rename(total_pop = "Total Population",
GEOID = FIPS,
labor_force = "In Labor Force 16 Years and Over:",
labor_force_armed = "In Labor Force 16 Years and Over: in Armed Forces",
labor_force_civil = "In Labor Force 16 Years and Over: Civilian",
male_labor = "Civilian Male in Labor Force 16 Years and Over:",
male_labor_emp = "Civilian Male in Labor Force 16 Years and Over: Employed",
male_labor_unemp = "Civilian Male in Labor Force 16 Years and Over: Unemployed",
female_labor = "Civilian Female in Labor Force 16 Years and Over:",
female_labor_emp = "Civilian Female in Labor Force 16 Years and Over: Employed",
female_labor_unemp = "Civilian Female in Labor Force 16 Years and Over: Unemployed") %>%
mutate(total_pop = as.numeric(total_pop),
labor_force = as.integer(labor_force),
labor_force_armed = as.integer(labor_force_armed),
labor_force_civil = as.integer(labor_force_civil),
male_labor = as.integer(male_labor),
male_labor_emp = as.integer(male_labor_emp),
male_labor_unemp = as.integer(male_labor_unemp),
female_labor = as.integer(female_labor),
female_labor_emp = as.integer(female_labor_emp),
female_labor_unemp = as.integer(female_labor_unemp)) %>%
select(GEOID, total_pop, labor_force, labor_force_armed, labor_force_civil, male_labor, male_labor_emp, male_labor_unemp, female_labor, female_labor_emp, female_labor_unemp)
head(unemp)
summary(unemp)
## GEOID total_pop labor_force labor_force_armed
## Length:3220 Min. : 74 Min. : 39 Min. : 0.0
## Class :character 1st Qu.: 11214 1st Qu.: 4915 1st Qu.: 0.0
## Mode :character Median : 25848 Median : 11472 Median : 8.0
## Mean : 100768 Mean : 50760 Mean : 318.7
## 3rd Qu.: 66608 3rd Qu.: 31106 3rd Qu.: 49.0
## Max. :10105722 Max. :5215695 Max. :72524.0
## labor_force_civil male_labor male_labor_emp male_labor_unemp
## Min. : 39 Min. : 19 Min. : 19 Min. : 0.0
## 1st Qu.: 4910 1st Qu.: 2648 1st Qu.: 2443 1st Qu.: 159.0
## Median : 11440 Median : 6128 Median : 5656 Median : 427.5
## Mean : 50441 Mean : 26543 Mean : 24749 Mean : 1794.0
## 3rd Qu.: 30912 3rd Qu.: 16352 3rd Qu.: 15132 3rd Qu.: 1185.2
## Max. :5212243 Max. :2816484 Max. :2603083 Max. :213401.0
## female_labor female_labor_emp female_labor_unemp
## Min. : 20 Min. : 20 Min. : 0.0
## 1st Qu.: 2258 1st Qu.: 2101 1st Qu.: 122.8
## Median : 5329 Median : 4937 Median : 334.0
## Mean : 23898 Mean : 22344 Mean : 1554.1
## 3rd Qu.: 14664 3rd Qu.: 13835 3rd Qu.: 996.8
## Max. :2395759 Max. :2202734 Max. :193025.0
unemp$GEOID = parse_integer(unemp$GEOID)
t_county$GEOID = parse_integer(t_county$GEOID)
comb_data = left_join(t_county, unemp, by="GEOID")
t_comb_data_sub <- comb_data %>%
filter(STATEFP !="02") %>%
filter(STATEFP !="15") %>%
filter(STATEFP !="60") %>%
filter(STATEFP !="66") %>%
filter(STATEFP !="69") %>%
filter(STATEFP !="72") %>%
filter(STATEFP !="78")
library(tmap)
library(tmaptools)
library(RColorBrewer)
us_states <- t_comb_data_sub %>%
aggregate_map(by="STATEFP")
t_comb_data_sub = t_comb_data_sub %>%
group_by(GEOID) %>%
mutate(emp_diff = male_labor_emp - female_labor_emp,
proportion = labor_force / total_pop,
male_prop = male_labor_unemp / male_labor,
female_prop = female_labor_unemp / female_labor,
geder_diff = male_prop - female_prop)
tm_shape(t_comb_data_sub, projection = 2163) + tm_polygons(col=c('male_prop', 'female_prop'), breaks=c(0, 0.1, 0.2, 0.3, 0.4), palette="YlOrRd", midpoint=0, border.col="grey", border.alpha = .3, title = c("Male Unemployment Rates", "Female Unemployment Rates")) + tm_shape(us_states) + tm_borders(lwd = .28, col = "black", alpha = 1) +tm_layout(panel.labels=c("Male Unemployment among Male Labor Force", "Female Unemployment among Female Labor Force"), legend.position = c("left", "bottom"))
Areas with the highest unemployment rate for both males and females are located around southern west. However, areas with higher unemployment rate for males are more spreadout while areas with higher unemployment rate for females are more concentrated in the southern west counties.
ggplot(t_comb_data_sub, aes(x=male_prop)) + geom_density(fill="lightblue") + geom_vline(data=t_comb_data_sub, aes(xintercept=mean(male_prop))) + geom_density(aes(x=female_prop), fill="pink", alpha=.4) + geom_vline(data=t_comb_data_sub, aes(xintercept=mean(female_prop)), linetype="dashed") + labs(title = "Unemployment Rate by Gender", x = "Unemployment Rate among the Same Gender", y = "Density")
While the above maps show how the areas with higher unemployment rates for males and females are distributed, a density plot tells us that the mean unemployment rate for females (dashed line) is slightly lower than the mean unemployment rate for males (straight line).
#cb=TRUE
tm_shape(t_comb_data_sub, projection=2163) + tm_polygons('male_prop', breaks=c(0,0.1,0.2,0.3,0.4), style="fixed", palette="YlOrRd", border.col="grey", border.alpha=.4) + tm_shape(us_states) + tm_borders(lwd=.36, col="black", alpha=1) + tm_layout(panel.labels="cb=TRUE", legend.position = c("left", "bottom"))
t_county2$GEOID = parse_integer(t_county2$GEOID)
comb_data2 = left_join(t_county2, unemp, by="GEOID")
t_comb_data_sub2 <- comb_data2 %>%
filter(STATEFP !="02") %>%
filter(STATEFP !="15") %>%
filter(STATEFP !="60") %>%
filter(STATEFP !="66") %>%
filter(STATEFP !="69") %>%
filter(STATEFP !="72") %>%
filter(STATEFP !="78")
us_states2 <- t_comb_data_sub2 %>%
aggregate_map(by="STATEFP")
t_comb_data_sub2 = t_comb_data_sub2 %>%
group_by(GEOID) %>%
mutate(male_prop = male_labor_unemp / male_labor)
tm_shape(t_comb_data_sub2, projection=2163) + tm_polygons('male_prop', breaks=c(0,0.1,0.2,0.3,0.4), style="fixed", palette="YlOrRd", border.col="grey", border.alpha=.4) + tm_shape(us_states2) + tm_borders(lwd=.36, col="black", alpha=1) + tm_layout(panel.labels="cb=FALSE", legend.position = c("left", "bottom"))
When cb=FALSE, more features are included: NAMELSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, INPTLAT, INTPLON. A map using cb=TRUE draws the geographic boundaries more detailed and realistically compared to the map using cb=FALSE. However, the data points on the maps do not seem affected, thus, (at least in this analysis) the final results on both maps do not seem to be different.