library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.0
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(haven)
library(forcats)
getwd()
## [1] "/Users/idlhy/Library/CloudStorage/OneDrive-개인/R FILE"
setwd("/Volumes/NO NAME/1. Data/CPS/Data Cleaning")
load("./cpsdata_230402.RData")
# 1. W:M Ratio, Above Poverty Line (1978~2021)
## s_abpov - states
ggplot(merged_data) +
aes(x = year_1, y = s_abpov, colour = s_abpov) +
geom_point(shape = "circle", size = 1L) +
geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.95) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Above Poverty Line (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
color = "W:M Ratio") +
coord_flip() +
theme_minimal() +
facet_wrap(vars(state_name)) +
theme(text = element_text(family = "Georgia"),
plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_abpov - Individual State
abpov_single <- function(state) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_abpov,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_abpov,
group = state_name,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE,
stat="smooth") +
annotate("text",
label = state,
x = 2000, y = 1.02,
size = 4,
colour = "black",
family = "Times") +
scale_colour_manual(values = c("grey", "pink2")) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Above Poverty Line (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
coord_flip() +
theme_minimal() +
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
abpov_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_wkstat - Comparing two states
abpov_compare <- function(state1, state2) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_abpov,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_abpov,
group = state_name,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
stat="smooth",
method = "lm", # If you want to just "smooth", remove this line
show.legend = FALSE) +
scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
annotate("text",
label = state2,
x = 2000, y = 1.02,
size = 5,
colour = "red3",
family = "Times") +
annotate("text",
label = state1,
x = 1995, y = 0.93,
size = 5,
colour = "darkgreen",
family = "Times") +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Above Poverty Line (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
theme_minimal() +
coord_flip() + # transpose
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
abpov_compare("Virginia", "Alaska")
## `geom_smooth()` using formula = 'y ~ x'

# 2. W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)
## s_educ - states
ggplot(merged_data) +
aes(x = year_1, y = s_educ, colour = s_educ) +
geom_point(shape = "circle", size = 1L) +
geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 1) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
color = "W:M Ratio") +
coord_flip() +
theme_minimal() +
facet_wrap(vars(state_name)) +
theme(text = element_text(family = "Georgia"),
plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_educ - Individual State
educ_single <- function(state) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_educ,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_educ,
group = state_name,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE,
stat="smooth") +
annotate("text",
label = state,
x = 2000, y = 0.7,
size = 4,
colour = "black",
family = "Times") +
scale_colour_manual(values = c("grey", "pink2")) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
coord_flip() +
theme_minimal() +
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
educ_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_educ - Comparing two states
educ_compare <- function(state1, state2) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_educ,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_educ,
group = state_name,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
stat="smooth",
method = "lm", # If you want to just "smooth", remove this line
show.legend = FALSE) +
scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
annotate("text",
label = state2,
x = 2000, y = 0.45,
size = 5,
colour = "red3",
family = "Times") +
annotate("text",
label = state1,
x = 1995, y = 1.1,
size = 5,
colour = "darkgreen",
family = "Times") +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
theme_minimal() +
coord_flip() + # transpose
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
educ_compare("South Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'

# 3. W:M Ratio, Full-time Employment (Age > 16) (1978~2021)
## s_wkstat - states
ggplot(merged_data) +
aes(x = year_1, y = s_wkstat, colour = s_wkstat) +
geom_point(shape = "circle", size = 1L) +
geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.7) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
color = "W:M Ratio") +
coord_flip() +
theme_minimal() +
facet_wrap(vars(state_name)) +
theme(text = element_text(family = "Georgia"),
plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_wkstat - Individual State
wkstat_single <- function(state) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_wkstat,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_wkstat,
group = state_name,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE,
stat="smooth") +
annotate("text",
label = state,
x = 2000, y = 0.8,
size = 4,
colour = "black",
family = "Times") +
scale_colour_manual(values = c("grey", "pink2")) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
coord_flip() +
theme_minimal() +
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
wkstat_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_wkstat - Comparing two states
wkstat_compare <- function(state1, state2) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_wkstat,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_wkstat,
group = state_name,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
stat="smooth",
method = "lm", # If you want to just "smooth", remove this line
show.legend = FALSE) +
scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
annotate("text",
label = state2,
x = 2000, y = 0.45,
size = 5,
colour = "red3",
family = "Times") +
annotate("text",
label = state1,
x = 1995, y = 0.75,
size = 5,
colour = "darkgreen",
family = "Times") +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
theme_minimal() +
coord_flip() + # transpose
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
wkstat_compare("North Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'

# 4. W:M Ratio, Self-Employment (Age > 18) (1978~2021)
## s_selfem - states
ggplot(merged_data) +
aes(x = year_1, y = s_selfem, colour = s_selfem) +
geom_point(shape = "circle", size = 1L) +
geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.7) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
color = "W:M Ratio") +
coord_flip() +
scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
theme_minimal() +
facet_wrap(vars(state_name)) +
theme(text = element_text(family = "Georgia"),
plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 33 rows containing missing values (`geom_point()`).

## s_selfem - Individual State
selfem_single <- function(state) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_selfem,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_selfem,
group = state_name,
color = state_name == state,
alpha = state_name == state),
show.legend = FALSE,
stat="smooth") +
annotate("text",
label = state,
x = 2000, y = 0.8,
size = 4,
colour = "black",
family = "Times") +
scale_colour_manual(values = c("grey", "pink2")) +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
coord_flip() +
scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
theme_minimal() +
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
selfem_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Removed 33 rows containing missing values (`geom_point()`).

## s_selfem - Comparing two states
selfem_compare <- function(state1, state2) {
ggplot(merged_data) +
geom_jitter(aes(x = year_1, y = s_selfem,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
show.legend = FALSE) +
geom_line(aes(x = year_1, y = s_selfem,
group = state_name,
color = ifelse(state_name %in% c(state1, state2),
ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
stat="smooth",
method = "lm", # If you want to just "smooth", remove this line
show.legend = FALSE) +
scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
annotate("text",
label = state2,
x = 2000, y = 0.45,
size = 5,
colour = "red3",
family = "Times") +
annotate("text",
label = state1,
x = 1995, y = 0.75,
size = 5,
colour = "darkgreen",
family = "Times") +
labs(x = "Year", y = "W:M Ratio",
title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)",
subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
theme_minimal() +
scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
coord_flip() + # transpose
theme(text = element_text(family = "Times"),
plot.title = element_text(face = "bold"))
}
selfem_compare("North Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Removed 33 rows containing missing values (`geom_point()`).
