library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.0
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(haven)
library(forcats)
getwd()
## [1] "/Users/idlhy/Library/CloudStorage/OneDrive-개인/R FILE"
setwd("/Volumes/NO NAME/1. Data/CPS/Data Cleaning")
load("./cpsdata_230402.RData")
# 1. W:M Ratio, Above Poverty Line (1978~2021)
## s_abpov - states
ggplot(merged_data) +
  aes(x = year_1, y = s_abpov, colour = s_abpov) +
  geom_point(shape = "circle", size = 1L) +
  geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
  scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.95) +
  labs(x = "Year", y = "W:M Ratio", 
       title = "W:M Ratio, Above Poverty Line (1978~2021)", 
       subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
       color = "W:M Ratio") +
  coord_flip() +
  theme_minimal() +
  facet_wrap(vars(state_name)) +
  theme(text = element_text(family = "Georgia"),
        plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_abpov - Individual State 
abpov_single <- function(state) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_abpov, 
                    color = state_name == state, 
                    alpha = state_name == state),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_abpov, 
                  group = state_name, 
                  color = state_name == state, 
                  alpha = state_name == state),
              show.legend = FALSE,
              stat="smooth") +
    annotate("text", 
             label = state,
             x = 2000, y = 1.02, 
             size = 4, 
             colour = "black",
             family = "Times") +
    scale_colour_manual(values = c("grey", "pink2")) +
    labs(x = "Year", y = "W:M Ratio", 
         title = "W:M Ratio, Above Poverty Line (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    coord_flip() +
    theme_minimal() +
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
abpov_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_wkstat - Comparing two states
abpov_compare <- function(state1, state2) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_abpov,
                    color = ifelse(state_name %in% c(state1, state2),
                                   ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                    alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_abpov,
                  group = state_name,
                  color = ifelse(state_name %in% c(state1, state2),
                                 ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                  alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
              stat="smooth",
              method = "lm", # If you want to just "smooth", remove this line
              show.legend = FALSE) +
    scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
    annotate("text",
             label = state2,
             x = 2000, y = 1.02,
             size = 5,
             colour = "red3",
             family = "Times") +
    annotate("text",
             label = state1,
             x = 1995, y = 0.93,
             size = 5,
             colour = "darkgreen",
             family = "Times") +
    labs(x = "Year", y = "W:M Ratio",
         title = "W:M Ratio, Above Poverty Line (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    theme_minimal() +
    coord_flip() + # transpose
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
abpov_compare("Virginia", "Alaska")
## `geom_smooth()` using formula = 'y ~ x'

# 2. W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)
## s_educ - states
ggplot(merged_data) +
  aes(x = year_1, y = s_educ, colour = s_educ) +
  geom_point(shape = "circle", size = 1L) +
  geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
  scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 1) +
  labs(x = "Year", y = "W:M Ratio", 
       title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)", 
       subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
       color = "W:M Ratio") +
  coord_flip() +
  theme_minimal() +
  facet_wrap(vars(state_name)) +
  theme(text = element_text(family = "Georgia"),
        plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_educ - Individual State 
educ_single <- function(state) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_educ, 
                    color = state_name == state, 
                    alpha = state_name == state),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_educ, 
                  group = state_name, 
                  color = state_name == state, 
                  alpha = state_name == state),
              show.legend = FALSE,
              stat="smooth") +
    annotate("text", 
             label = state,
             x = 2000, y = 0.7, 
             size = 4, 
             colour = "black",
             family = "Times") +
    scale_colour_manual(values = c("grey", "pink2")) +
    labs(x = "Year", y = "W:M Ratio", 
         title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    coord_flip() +
    theme_minimal() +
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
educ_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_educ - Comparing two states
educ_compare <- function(state1, state2) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_educ,
                    color = ifelse(state_name %in% c(state1, state2),
                                   ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                    alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_educ,
                  group = state_name,
                  color = ifelse(state_name %in% c(state1, state2),
                                 ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                  alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
              stat="smooth",
              method = "lm", # If you want to just "smooth", remove this line
              show.legend = FALSE) +
    scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
    annotate("text",
             label = state2,
             x = 2000, y = 0.45,
             size = 5,
             colour = "red3",
             family = "Times") +
    annotate("text",
             label = state1,
             x = 1995, y = 1.1,
             size = 5,
             colour = "darkgreen",
             family = "Times") +
    labs(x = "Year", y = "W:M Ratio",
         title = "W:M Ratio, BA Degree or Higher (Age > 22) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    theme_minimal() +
    coord_flip() + # transpose
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
educ_compare("South Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'

# 3. W:M Ratio, Full-time Employment (Age > 16) (1978~2021)
## s_wkstat - states
ggplot(merged_data) +
  aes(x = year_1, y = s_wkstat, colour = s_wkstat) +
  geom_point(shape = "circle", size = 1L) +
  geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
  scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.7) +
  labs(x = "Year", y = "W:M Ratio", 
       title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)", 
       subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
       color = "W:M Ratio") +
  coord_flip() +
  theme_minimal() +
  facet_wrap(vars(state_name)) +
  theme(text = element_text(family = "Georgia"),
        plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'

## s_wkstat - Individual State 
wkstat_single <- function(state) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_wkstat, 
                    color = state_name == state, 
                    alpha = state_name == state),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_wkstat, 
                  group = state_name, 
                  color = state_name == state, 
                  alpha = state_name == state),
              show.legend = FALSE,
              stat="smooth") +
    annotate("text", 
             label = state,
             x = 2000, y = 0.8, 
             size = 4, 
             colour = "black",
             family = "Times") +
    scale_colour_manual(values = c("grey", "pink2")) +
    labs(x = "Year", y = "W:M Ratio", 
         title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    coord_flip() +
    theme_minimal() +
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
wkstat_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## s_wkstat - Comparing two states
wkstat_compare <- function(state1, state2) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_wkstat,
                    color = ifelse(state_name %in% c(state1, state2),
                                   ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                    alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_wkstat,
                  group = state_name,
                  color = ifelse(state_name %in% c(state1, state2),
                                 ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                  alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
              stat="smooth",
              method = "lm", # If you want to just "smooth", remove this line
              show.legend = FALSE) +
    scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
    annotate("text",
             label = state2,
             x = 2000, y = 0.45,
             size = 5,
             colour = "red3",
             family = "Times") +
    annotate("text",
             label = state1,
             x = 1995, y = 0.75,
             size = 5,
             colour = "darkgreen",
             family = "Times") +
    labs(x = "Year", y = "W:M Ratio",
         title = "W:M Ratio, Full-time Employment (Age > 16) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    theme_minimal() +
    coord_flip() + # transpose
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
wkstat_compare("North Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'

# 4. W:M Ratio, Self-Employment (Age > 18) (1978~2021)
## s_selfem - states
ggplot(merged_data) +
  aes(x = year_1, y = s_selfem, colour = s_selfem) +
  geom_point(shape = "circle", size = 1L) +
  geom_smooth(color = "grey30", alpha = 0.4, method = lm) +
  scale_color_gradient2(low = "blue3", mid = "purple1", high = "red", midpoint = 0.7) +
  labs(x = "Year", y = "W:M Ratio", 
       title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)", 
       subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)",
       color = "W:M Ratio") +
  coord_flip() +
  scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
  theme_minimal() +
  facet_wrap(vars(state_name)) +
  theme(text = element_text(family = "Georgia"),
        plot.title = element_text(face = "bold"))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 33 rows containing missing values (`geom_point()`).

## s_selfem - Individual State 
selfem_single <- function(state) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_selfem, 
                    color = state_name == state, 
                    alpha = state_name == state),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_selfem, 
                  group = state_name, 
                  color = state_name == state, 
                  alpha = state_name == state),
              show.legend = FALSE,
              stat="smooth") +
    annotate("text", 
             label = state,
             x = 2000, y = 0.8, 
             size = 4, 
             colour = "black",
             family = "Times") +
    scale_colour_manual(values = c("grey", "pink2")) +
    labs(x = "Year", y = "W:M Ratio", 
         title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    coord_flip() +
    scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
    theme_minimal() +
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
selfem_single("District of Columbia")
## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Removed 33 rows containing missing values (`geom_point()`).

## s_selfem - Comparing two states
selfem_compare <- function(state1, state2) {
  ggplot(merged_data) +
    geom_jitter(aes(x = year_1, y = s_selfem,
                    color = ifelse(state_name %in% c(state1, state2),
                                   ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                    alpha = ifelse(state_name %in% c(state1, state2), 0.5, 0)),
                show.legend = FALSE) +
    geom_line(aes(x = year_1, y = s_selfem,
                  group = state_name,
                  color = ifelse(state_name %in% c(state1, state2),
                                 ifelse(state_name == state1, "red3", "darkgreen"), "grey"),
                  alpha = ifelse(state_name %in% c(state1, state2), 1, 0)),
              stat="smooth",
              method = "lm", # If you want to just "smooth", remove this line
              show.legend = FALSE) +
    scale_colour_manual(values = c("red3", "grey", "darkgreen")) +
    annotate("text",
             label = state2,
             x = 2000, y = 0.45,
             size = 5,
             colour = "red3",
             family = "Times") +
    annotate("text",
             label = state1,
             x = 1995, y = 0.75,
             size = 5,
             colour = "darkgreen",
             family = "Times") +
    labs(x = "Year", y = "W:M Ratio",
         title = "W:M Ratio, Self-Employment (Age > 18) (1978~2021)", 
         subtitle = "Data = Current Population Survey (N = 2,058,407)\nMade by Heeyoung(hlee25@albany.edu)") +
    theme_minimal() +
    scale_y_continuous(limits=c(0, 1.0), breaks = c(0, 0.5, 1)) +
    coord_flip() + # transpose
    theme(text = element_text(family = "Times"),
          plot.title = element_text(face = "bold"))
}
selfem_compare("North Dakota", "Utah")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 33 rows containing non-finite values (`stat_smooth()`).
## Removed 33 rows containing missing values (`geom_point()`).