TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)
library(effsize)

Step 2: Import the data

TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx")

Step 3: Preview the data

view(TAS_data_long_format_age)
head(TAS_data_long_format_age)
## # A tibble: 6 × 42
##     TAS TAS05 TAS09 TAS15 `1968 Interview Number` `Person Number` Gender
##   <dbl> <dbl> <dbl> <dbl>                   <dbl>           <dbl>  <dbl>
## 1     2     1     1    NA                       4             180      2
## 2     2     1     1    NA                       5              32      2
## 3     2     1     1    NA                       6              34      1
## 4     2     1     1    NA                      14              30      1
## 5     1     1    NA    NA                      18              38      2
## 6     2     1     1    NA                      47              34      2
## # ℹ 35 more variables: `Individual is sample` <dbl>, `Year ID Number` <dbl>,
## #   `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## #   `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## #   C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## #   E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## #   G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## #   G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>, …

2005 & 2015

Step 4: Regression (2005 & 2015)

Filter the data (2005 & 2015)

Long_format_2005_2015_new <-  TAS_data_long_format_age %>% filter(year==2005| year==2015) %>% filter(Age_18_graduate == 18|Age_18_graduate == 19) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1))
knitr::kable(head(Long_format_2005_2015_new[, 1:43]))
TAS TAS05 TAS09 TAS15 1968 Interview Number Person Number Gender Individual is sample Year ID Number Sequence Number Relationship to Head Release Number B5A B5D B6C C2D C2E C2F D2D3_month D2D3_year E1_1st_mention E1_2nd_mention E1_3rd_mention E3 G1 G2_month G2_year G10 G11 G30A G41A G41B G41C G41H G41P H1 L7_1st_mention L7_2nd_mention L7_3rd_mention Age_17_graduate Age_18_graduate year year_new
1 1 NA NA 18 38 2 2 5647 3 98 5 3 4 3 4 2 2 0 0 3 7 0 5 1 6 2004 1 5 6 5 5 5 7 5 2 1 0 0 18 19 2005 -1
2 1 1 NA 47 34 2 2 2516 3 30 5 4 5 6 4 5 2 0 0 1 0 0 0 1 5 2005 5 0 6 3 6 4 7 4 1 1 0 0 17 18 2005 -1
2 1 1 NA 53 36 2 2 1616 3 30 5 4 5 7 4 1 1 0 0 6 0 0 1 1 6 2005 1 5 7 7 7 5 7 6 2 1 0 0 17 18 2005 -1
2 1 1 NA 79 32 2 2 6520 2 30 5 3 4 6 7 5 3 0 0 1 7 0 0 1 5 2004 1 1 0 7 7 6 7 4 1 1 0 0 18 19 2005 -1
2 1 1 NA 88 35 1 2 3411 2 30 5 2 5 7 3 1 2 0 0 1 0 0 0 1 5 2005 1 1 7 2 6 5 6 7 2 1 0 0 17 18 2005 -1
2 1 1 NA 89 34 2 2 4527 3 30 5 2 4 5 2 3 1 0 0 3 7 0 5 1 5 2005 1 1 7 5 6 4 7 5 1 1 0 0 17 18 2005 -1
Long_format_2005_2015_new %>% count(year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   348
## 2  2015   254
data_2005 <- Long_format_2005_2015_new %>% filter(year == 2005)
data_2015 <- Long_format_2005_2015_new %>% filter(year == 2015)

B5A: Responbility for self

Long_format_2005_2015_new %>% group_by(year) %>% count(B5A)
## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5A     n
##    <dbl> <dbl> <int>
##  1  2005     1    14
##  2  2005     2    73
##  3  2005     3    91
##  4  2005     4   111
##  5  2005     5    59
##  6  2015     1     9
##  7  2015     2    52
##  8  2015     3    60
##  9  2015     4    80
## 10  2015     5    53
B5A_Regression_05_15 <- lm(B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B5A_Regression_05_15
## 
## Call:
## lm(formula = B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                -2.669620                  0.327591                 -0.117341  
## Age_18_graduate:year_new  
##                 0.007337
summary(B5A_Regression_05_15)
## 
## Call:
## lm(formula = B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5767 -0.5767  0.4233  0.7877  1.7877 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -2.669620   1.749622  -1.526 0.127582    
## Age_18_graduate           0.327591   0.094139   3.480 0.000538 ***
## year_new                 -0.117341   1.749622  -0.067 0.946551    
## Age_18_graduate:year_new  0.007337   0.094139   0.078 0.937899    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.113 on 598 degrees of freedom
## Multiple R-squared:  0.02196,    Adjusted R-squared:  0.01705 
## F-statistic: 4.476 on 3 and 598 DF,  p-value: 0.00405
Long_format_2005_2015_new$predicted_B5A <- predict(B5A_Regression_05_15)
Long_format_2005_2015_new$year_factor_B5A <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B5A, color = year_factor_B5A)) + geom_point(aes(shape = year_factor_B5A), alpha = 0.5) + geom_line(aes(y = predicted_B5A), size = 1) +  labs(title = "Responsibility for Self (B5A) by Age and Year",x = "Age", y = "Responsibility for Self (B5A)", color = "Year", shape = "Year") + theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

t_test_B5A <- t.test(B5A ~ year_factor_B5A, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B5A
## 
##  Two Sample t-test
## 
## data:  B5A by year_factor_B5A
## t = -0.95946, df = 600, p-value = 0.3377
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.2707990  0.0930454
## sample estimates:
## mean in group 2005 mean in group 2015 
##           3.367816           3.456693
mean_B5A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0515
## # A tibble: 2 × 2
##    year average_B5A
##   <dbl>       <dbl>
## 1  2005        3.37
## 2  2015        3.46
sd_B5A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0515
## # A tibble: 2 × 2
##    year sd_B5A
##   <dbl>  <dbl>
## 1  2005   1.11
## 2  2015   1.14
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B5A, y = B5A, fill = year_factor_B5A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (B5A) in 2005 and 2015",x = "Year", y = "Responsibility for Self (B5A)", fill = "Year") + theme_minimal()

effect_size_B5A_05_15 <- cohens_d(data_2005$B5A, data_2015$B5A, paired = FALSE)
effect_size_B5A_05_15
## Cohen's d |        95% CI
## -------------------------
## -0.08     | [-0.24, 0.08]
## 
## - Estimated using pooled SD.

B5D: Managing own money

Long_format_2005_2015_new %>% group_by(year) %>% count(B5D)
## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5D     n
##    <dbl> <dbl> <int>
##  1  2005     1     7
##  2  2005     2    14
##  3  2005     3    39
##  4  2005     4    89
##  5  2005     5   199
##  6  2015     1    10
##  7  2015     2    10
##  8  2015     3    22
##  9  2015     4    61
## 10  2015     5   151
B5D_Regression_05_15 <- lm(B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B5D_Regression_05_15
## 
## Call:
## lm(formula = B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                   0.4266                    0.2090                   -1.9583  
## Age_18_graduate:year_new  
##                   0.1044
summary(B5D_Regression_05_15)
## 
## Call:
## lm(formula = B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4233 -0.3728  0.5767  0.6272  0.8901 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               0.42660    1.56617   0.272   0.7854  
## Age_18_graduate           0.20902    0.08427   2.480   0.0134 *
## year_new                 -1.95832    1.56617  -1.250   0.2116  
## Age_18_graduate:year_new  0.10440    0.08427   1.239   0.2159  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9961 on 598 degrees of freedom
## Multiple R-squared:  0.01116,    Adjusted R-squared:  0.006201 
## F-statistic:  2.25 on 3 and 598 DF,  p-value: 0.08146
Long_format_2005_2015_new$predicted_B5D <- predict(B5D_Regression_05_15)
Long_format_2005_2015_new$year_factor_B5D <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B5D, color = factor(year_factor_B5D))) + geom_point(aes(shape = year_factor_B5D), alpha = 0.5) + geom_line(aes(y = predicted_B5D), size = 1) +  labs(title = "Managing own money (B5D) by Age and Year", x = "Age",y = "Managing own money (B5D)", color = "Year", shape = "Year") + theme_minimal()

t_test_B5D <- t.test(B5D ~ year_factor_B5D, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B5D
## 
##  Two Sample t-test
## 
## data:  B5D by year_factor_B5D
## t = 0.096233, df = 600, p-value = 0.9234
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.1541366  0.1700204
## sample estimates:
## mean in group 2005 mean in group 2015 
##           4.318966           4.311024
mean_B5D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0515
## # A tibble: 2 × 2
##    year average_B5D
##   <dbl>       <dbl>
## 1  2005        4.32
## 2  2015        4.31
sd_B5D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0515
## # A tibble: 2 × 2
##    year sd_B5D
##   <dbl>  <dbl>
## 1  2005  0.963
## 2  2015  1.05
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B5D, y = B5D, fill = year_factor_B5D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Managing Own Money (B5D) in 2005 and 2015",x = "Year", y = "Managing Own Money (B5D)", fill = "Year") + theme_minimal() 

effect_size_B5D_05_15 <- cohens_d(data_2005$B5D, data_2015$B5D, paired = FALSE)
effect_size_B5D_05_15
## Cohen's d |        95% CI
## -------------------------
## 7.94e-03  | [-0.15, 0.17]
## 
## - Estimated using pooled SD.

B6C: Money management skills

Long_format_2005_2015_new %>% group_by(year) %>% count(B6C)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   B6C     n
##    <dbl> <dbl> <int>
##  1  2005     1     4
##  2  2005     2    11
##  3  2005     3    23
##  4  2005     4    41
##  5  2005     5    98
##  6  2005     6    69
##  7  2005     7   102
##  8  2015     1     3
##  9  2015     2     8
## 10  2015     3    11
## 11  2015     4    36
## 12  2015     5    70
## 13  2015     6    65
## 14  2015     7    61
B6C_Regression_05_15 <- lm(B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B6C_Regression_05_15
## 
## Call:
## lm(formula = B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  6.50185                  -0.06079                  -1.55673  
## Age_18_graduate:year_new  
##                  0.08337
summary(B6C_Regression_05_15)
## 
## Call:
## lm(formula = B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4637 -0.4637 -0.3195  1.5363  1.6805 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               6.50185    2.22003   2.929  0.00353 **
## Age_18_graduate          -0.06079    0.11945  -0.509  0.61101   
## year_new                 -1.55673    2.22003  -0.701  0.48344   
## Age_18_graduate:year_new  0.08337    0.11945   0.698  0.48546   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.412 on 598 degrees of freedom
## Multiple R-squared:  0.001631,   Adjusted R-squared:  -0.003377 
## F-statistic: 0.3257 on 3 and 598 DF,  p-value: 0.8068
Long_format_2005_2015_new$predicted_B6C <- predict(B6C_Regression_05_15)
Long_format_2005_2015_new$year_factor_B6C <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B6C, color = factor(year_factor_B6C))) + geom_point(aes(shape = year_factor_B6C), alpha = 0.5) + geom_line(aes(y = predicted_B6C), size = 1) +  labs(title = "Money management skills (B6C) by Age and Year", x = "Age",y = "Money Management skills (B6C)", color = "Year", shape = "Year") + theme_minimal()

t_test_B6C <- t.test(B6C ~ year_factor_B6C, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B6C
## 
##  Two Sample t-test
## 
## data:  B6C by year_factor_B6C
## t = 0.23653, df = 600, p-value = 0.8131
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.2011003  0.2561732
## sample estimates:
## mean in group 2005 mean in group 2015 
##           5.393678           5.366142
mean_B6C_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0515
## # A tibble: 2 × 2
##    year average_B6C
##   <dbl>       <dbl>
## 1  2005        5.39
## 2  2015        5.37
sd_B6C_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0515
## # A tibble: 2 × 2
##    year sd_B6C
##   <dbl>  <dbl>
## 1  2005   1.44
## 2  2015   1.37
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B6C, y = B6C, fill = year_factor_B6C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Money management skills (B6C) in 2005 and 2015",x = "Year", y = "Money management skills (B6C)", fill = "Year") + theme_minimal()

effect_size_B6C_05_15 <- cohens_d(data_2005$B6C, data_2015$B6C, paired = FALSE)
effect_size_B6C_05_15
## Cohen's d |        95% CI
## -------------------------
## 0.02      | [-0.14, 0.18]
## 
## - Estimated using pooled SD.

C2D: Worry about expenses

Long_format_2005_2015_new %>% group_by(year) %>% count(C2D)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2D     n
##    <dbl> <dbl> <int>
##  1  2005     1    54
##  2  2005     2    52
##  3  2005     3    57
##  4  2005     4    66
##  5  2005     5    49
##  6  2005     6    34
##  7  2005     7    36
##  8  2015     1    48
##  9  2015     2    39
## 10  2015     3    48
## 11  2015     4    43
## 12  2015     5    37
## 13  2015     6    17
## 14  2015     7    22
C2D_Regression_05_15 <- lm(C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2D_Regression_05_15
## 
## Call:
## lm(formula = C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  0.97875                   0.14098                  -0.48721  
## Age_18_graduate:year_new  
##                  0.01913
summary(C2D_Regression_05_15)
## 
## Call:
## lm(formula = C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7811 -1.6592  0.2189  1.3408  3.6264 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)               0.97875    2.94572   0.332    0.740
## Age_18_graduate           0.14098    0.15850   0.890    0.374
## year_new                 -0.48721    2.94572  -0.165    0.869
## Age_18_graduate:year_new  0.01913    0.15850   0.121    0.904
## 
## Residual standard error: 1.873 on 598 degrees of freedom
## Multiple R-squared:  0.005396,   Adjusted R-squared:  0.0004064 
## F-statistic: 1.081 on 3 and 598 DF,  p-value: 0.3563
Long_format_2005_2015_new$predicted_C2D <- predict(C2D_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2D <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2D, color = factor(year_factor_C2D))) + geom_point(aes(shape = year_factor_C2D), alpha = 0.5) + geom_line(aes(y = predicted_C2D), size = 1) +  labs(title = "Worry about expenses (C2D) by Age and Year", x = "Age",y = "Worry about expenses (C2D)", color = "Year", shape = "Year") + theme_minimal()

t_test_C2D <- t.test(C2D ~ year_factor_C2D, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2D
## 
##  Two Sample t-test
## 
## data:  C2D by year_factor_C2D
## t = 1.5669, df = 600, p-value = 0.1177
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.06132932  0.54535503
## sample estimates:
## mean in group 2005 mean in group 2015 
##           3.718391           3.476378
mean_C2D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0515
## # A tibble: 2 × 2
##    year average_C2D
##   <dbl>       <dbl>
## 1  2005        3.72
## 2  2015        3.48
sd_C2D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0515
## # A tibble: 2 × 2
##    year sd_C2D
##   <dbl>  <dbl>
## 1  2005   1.88
## 2  2015   1.85
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2D, y = C2D, fill = year_factor_C2D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about Expenses (C2D) in 2005 and 2015",x = "Year", y = "Worry about Expenses (C2D)", fill = "Year") + theme_minimal()

effect_size_C2D_05_15 <- cohens_d(data_2005$C2D, data_2015$C2D, paired = FALSE)
effect_size_C2D_05_15
## Cohen's d |        95% CI
## -------------------------
## 0.13      | [-0.03, 0.29]
## 
## - Estimated using pooled SD.

C2E: Worry about future job

Long_format_2005_2015_new %>% group_by(year) %>% count(C2E)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2E     n
##    <dbl> <dbl> <int>
##  1  2005     1    64
##  2  2005     2    65
##  3  2005     3    57
##  4  2005     4    41
##  5  2005     5    45
##  6  2005     6    45
##  7  2005     7    31
##  8  2015     1    46
##  9  2015     2    42
## 10  2015     3    39
## 11  2015     4    45
## 12  2015     5    29
## 13  2015     6    21
## 14  2015     7    32
C2E_Regression_05_15 <- lm(C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2E_Regression_05_15
## 
## Call:
## lm(formula = C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                 -1.59475                   0.27944                  -1.26280  
## Age_18_graduate:year_new  
##                  0.06857
summary(C2E_Regression_05_15)
## 
## Call:
## lm(formula = C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7546 -1.6746 -0.4066  1.5363  3.5934 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              -1.59475    3.08875  -0.516   0.6058  
## Age_18_graduate           0.27944    0.16619   1.681   0.0932 .
## year_new                 -1.26280    3.08875  -0.409   0.6828  
## Age_18_graduate:year_new  0.06857    0.16619   0.413   0.6801  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.964 on 598 degrees of freedom
## Multiple R-squared:  0.004974,   Adjusted R-squared:  -1.781e-05 
## F-statistic: 0.9964 on 3 and 598 DF,  p-value: 0.394
Long_format_2005_2015_new$predicted_C2E <- predict(C2E_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2E <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2E, color = factor(year_factor_C2E))) + geom_point(aes(shape = year_factor_C2E), alpha = 0.5) + geom_line(aes(y = predicted_C2E), size = 1) +  labs(title = "Worry about future job (C2E) by Age and Year", x = "Age",y = "Worry about future job (C2E)", color = "Year", shape = "Year") + theme_minimal()

t_test_C2E <- t.test(C2E ~ year_factor_C2E, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2E
## 
##  Two Sample t-test
## 
## data:  C2E by year_factor_C2E
## t = -0.39344, df = 600, p-value = 0.6941
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.3824418  0.2547832
## sample estimates:
## mean in group 2005 mean in group 2015 
##           3.566092           3.629921
mean_C2E_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0515
## # A tibble: 2 × 2
##    year average_C2E
##   <dbl>       <dbl>
## 1  2005        3.57
## 2  2015        3.63
sd_C2E_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0515
## # A tibble: 2 × 2
##    year sd_C2E
##   <dbl>  <dbl>
## 1  2005   1.96
## 2  2015   1.98
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2E, y = C2E, fill = year_factor_C2E)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about Future Job (C2E) in 2005 and 2015",x = "Year", y = "Worry about Future Job (C2E)", fill = "Year") + theme_minimal()

effect_size_C2E_05_15 <- cohens_d(data_2005$C2E, data_2015$C2E, paired = FALSE)
effect_size_C2E_05_15
## Cohen's d |        95% CI
## -------------------------
## -0.03     | [-0.19, 0.13]
## 
## - Estimated using pooled SD.

C2F: Discouraged about future

Long_format_2005_2015_new %>% group_by(year) %>% count(C2F)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2F     n
##    <dbl> <dbl> <int>
##  1  2005     1    82
##  2  2005     2    84
##  3  2005     3    56
##  4  2005     4    52
##  5  2005     5    38
##  6  2005     6    18
##  7  2005     7    18
##  8  2015     1    58
##  9  2015     2    57
## 10  2015     3    47
## 11  2015     4    36
## 12  2015     5    28
## 13  2015     6    17
## 14  2015     7    11
C2F_Regression_05_15 <- lm(C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2F_Regression_05_15
## 
## Call:
## lm(formula = C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  1.42792                   0.08599                  -2.84692  
## Age_18_graduate:year_new  
##                  0.15402
summary(C2F_Regression_05_15)
## 
## Call:
## lm(formula = C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1411 -1.1411 -0.1411  1.0178  4.0989 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)               1.42792    2.74853   0.520    0.604
## Age_18_graduate           0.08599    0.14789   0.581    0.561
## year_new                 -2.84692    2.74853  -1.036    0.301
## Age_18_graduate:year_new  0.15402    0.14789   1.041    0.298
## 
## Residual standard error: 1.748 on 598 degrees of freedom
## Multiple R-squared:  0.002172,   Adjusted R-squared:  -0.002834 
## F-statistic: 0.4338 on 3 and 598 DF,  p-value: 0.7289
Long_format_2005_2015_new$predicted_C2F <- predict(C2F_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2F <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2F, color = factor(year_factor_C2F))) + geom_point(aes(shape = year_factor_C2F), alpha = 0.5) + geom_line(aes(y = predicted_C2F), size = 1) +  labs(title = "Discouraged about future (C2F) by Age and Year", x = "Age",y = "Discouraged about future (C2F)", color = "Year", shape = "Year") + theme_minimal()

t_test_C2F <- t.test(C2F ~ year_factor_C2F, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2F
## 
##  Two Sample t-test
## 
## data:  C2F by year_factor_C2F
## t = -0.26272, df = 600, p-value = 0.7929
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.3210160  0.2452626
## sample estimates:
## mean in group 2005 mean in group 2015 
##           3.017241           3.055118
mean_C2F_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0515
## # A tibble: 2 × 2
##    year average_C2F
##   <dbl>       <dbl>
## 1  2005        3.02
## 2  2015        3.06
sd_C2F_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0515
## # A tibble: 2 × 2
##    year sd_C2F
##   <dbl>  <dbl>
## 1  2005   1.75
## 2  2015   1.74
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2F, y = C2F, fill = year_factor_C2F)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Discouraged about future (C2F) in 2005 and 2015",x = "Year", y = "Discouraged about future (C2F)", fill = "Year") + theme_minimal()

effect_size_C2F_05_15 <- cohens_d(data_2005$C2F, data_2015$C2F, paired = FALSE)
effect_size_C2F_05_15
## Cohen's d |        95% CI
## -------------------------
## -0.02     | [-0.18, 0.14]
## 
## - Estimated using pooled SD.

G30A: Likelihood of well-paying job (SPECIAL)

Long_format_2005_2015_new %>% group_by(year) %>% count(G30A)
## # A tibble: 12 × 3
## # Groups:   year [2]
##     year  G30A     n
##    <dbl> <dbl> <int>
##  1  2005     0    35
##  2  2005     1     1
##  3  2005     3     2
##  4  2005     4    15
##  5  2005     5    61
##  6  2005     6   114
##  7  2005     7   120
##  8  2015     3     2
##  9  2015     4    13
## 10  2015     5    48
## 11  2015     6    88
## 12  2015     7   103
Long_format_2005_2015_new_G30A <- Long_format_2005_2015_new %>% filter(G30A > 0)
Long_format_2005_2015_new_G30A %>% count(year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   313
## 2  2015   254
G30A_Regression_05_15 <- lm(G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G30A)
G30A_Regression_05_15
## 
## Call:
## lm(formula = G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G30A)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  5.00387                   0.05770                   0.37158  
## Age_18_graduate:year_new  
##                 -0.01934
summary(G30A_Regression_05_15)
## 
## Call:
## lm(formula = G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G30A)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0191 -0.5617 -0.0659  0.9038  0.9809 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               5.00387    1.51653   3.300  0.00103 **
## Age_18_graduate           0.05770    0.08159   0.707  0.47972   
## year_new                  0.37158    1.51653   0.245  0.80653   
## Age_18_graduate:year_new -0.01934    0.08159  -0.237  0.81269   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9436 on 563 degrees of freedom
## Multiple R-squared:  0.001401,   Adjusted R-squared:  -0.00392 
## F-statistic: 0.2634 on 3 and 563 DF,  p-value: 0.8518
Long_format_2005_2015_new_G30A$predicted_G30A <- predict(G30A_Regression_05_15)
Long_format_2005_2015_new_G30A$year_factor_G30A <- factor(Long_format_2005_2015_new_G30A$year)
ggplot(Long_format_2005_2015_new_G30A, aes(x = Age_18_graduate, y = G30A, color = factor(year_factor_G30A))) + geom_point(aes(shape = year_factor_G30A), alpha = 0.5) + geom_line(aes(y = predicted_G30A), size = 1) +  labs(title = "Likelihood of well-paying job (G30A) by Age and Year", x = "Age",y = "Likelihood of well-paying job (G30A)", color = "Year", shape = "Year") + theme_minimal()

t_test_G30A <- t.test(G30A ~ year_factor_G30A, data = Long_format_2005_2015_new_G30A, var.equal = TRUE)
t_test_G30A
## 
##  Two Sample t-test
## 
## data:  G30A by year_factor_G30A
## t = -0.41515, df = 565, p-value = 0.6782
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.1893790  0.1232927
## sample estimates:
## mean in group 2005 mean in group 2015 
##           6.057508           6.090551
mean_G30A_0515 <- Long_format_2005_2015_new_G30A %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0515
## # A tibble: 2 × 2
##    year average_G30A
##   <dbl>        <dbl>
## 1  2005         6.06
## 2  2015         6.09
sd_G30A_0515 <- Long_format_2005_2015_new_G30A %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0515
## # A tibble: 2 × 2
##    year sd_G30A
##   <dbl>   <dbl>
## 1  2005   0.952
## 2  2015   0.930
ggplot(Long_format_2005_2015_new_G30A, aes(x = year_factor_G30A, y = G30A, fill = year_factor_G30A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Likelihood of well-paying job (G30A) in 2005 and 2015",x = "Year", y = "Likelihood of well-paying job (G30A)", fill = "Year") + theme_minimal()

data_2005_G30A <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G30A > 0)
data_2015_G30A <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G30A > 0)
effect_size_G30A_05_15 <- cohens_d(data_2005_G30A$G30A, data_2015_G30A$G30A, paired = FALSE)
effect_size_G30A_05_15
## Cohen's d |        95% CI
## -------------------------
## -0.04     | [-0.20, 0.13]
## 
## - Estimated using pooled SD.

G41A: Importance of job status

Long_format_2005_2015_new %>% group_by(year) %>% count(G41A)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41A     n
##    <dbl> <dbl> <int>
##  1  2005     1    12
##  2  2005     2    15
##  3  2005     3    26
##  4  2005     4    29
##  5  2005     5    81
##  6  2005     6    77
##  7  2005     7   108
##  8  2015     1    15
##  9  2015     2    15
## 10  2015     3    22
## 11  2015     4    39
## 12  2015     5    58
## 13  2015     6    54
## 14  2015     7    51
G41A_Regression_05_15 <- lm(G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41A_Regression_05_15
## 
## Call:
## lm(formula = G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                   8.0868                   -0.1593                    4.9160  
## Age_18_graduate:year_new  
##                  -0.2768
summary(G41A_Regression_05_15)
## 
## Call:
## lm(formula = G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4024 -0.7178  0.2822  1.5976  2.2822 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                8.0868     2.6291   3.076  0.00219 **
## Age_18_graduate           -0.1593     0.1415  -1.126  0.26056   
## year_new                   4.9160     2.6291   1.870  0.06199 . 
## Age_18_graduate:year_new  -0.2768     0.1415  -1.956  0.05088 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.672 on 598 degrees of freedom
## Multiple R-squared:  0.0259, Adjusted R-squared:  0.02101 
## F-statistic:   5.3 on 3 and 598 DF,  p-value: 0.001303
Long_format_2005_2015_new$predicted_G41A <- predict(G41A_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41A <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41A, color = factor(year_factor_G41A))) + geom_point(aes(shape = year_factor_G41A), alpha = 0.5) + geom_line(aes(y = predicted_G41A), size = 1) +  labs(title = "Importance of job status (G41A) by Age and Year", x = "Age",y = "Importance of job status (G41A)", color = "Year", shape = "Year") + theme_minimal()

t_test_G41A <- t.test(G41A ~ year_factor_G41A, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41A
## 
##  Two Sample t-test
## 
## data:  G41A by year_factor_G41A
## t = 3.3843, df = 600, p-value = 0.0007602
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  0.1963895 0.7394870
## sample estimates:
## mean in group 2005 mean in group 2015 
##           5.341954           4.874016
mean_G41A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0515
## # A tibble: 2 × 2
##    year average_G41A
##   <dbl>        <dbl>
## 1  2005         5.34
## 2  2015         4.87
sd_G41A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0515
## # A tibble: 2 × 2
##    year sd_G41A
##   <dbl>   <dbl>
## 1  2005    1.64
## 2  2015    1.73
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41A, y = G41A, fill = year_factor_G41A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job status (G41A) in 2005 and 2015",x = "Year", y = "Importance of job status (G41A)", fill = "Year") + theme_minimal()

effect_size_G41A_05_15 <- cohens_d(data_2005$G41A, data_2015$G41A, paired = FALSE)
effect_size_G41A_05_15
## Cohen's d |       95% CI
## ------------------------
## 0.28      | [0.12, 0.44]
## 
## - Estimated using pooled SD.

G41B: Importance of decision-making

Long_format_2005_2015_new %>% group_by(year) %>% count(G41B)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41B     n
##    <dbl> <dbl> <int>
##  1  2005     1     2
##  2  2005     2     1
##  3  2005     3    12
##  4  2005     4    21
##  5  2005     5    97
##  6  2005     6   124
##  7  2005     7    91
##  8  2015     1     4
##  9  2015     2     5
## 10  2015     3     7
## 11  2015     4    21
## 12  2015     5    82
## 13  2015     6    61
## 14  2015     7    74
G41B_Regression_05_15 <- lm(G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41B_Regression_05_15
## 
## Call:
## lm(formula = G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                 5.753809                 -0.005847                  1.011916  
## Age_18_graduate:year_new  
##                -0.058672
summary(G41B_Regression_05_15)
## 
## Call:
## lm(formula = G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7456 -0.6927  0.2544  1.2544  1.4601 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               5.753809   1.877209   3.065  0.00227 **
## Age_18_graduate          -0.005847   0.101004  -0.058  0.95386   
## year_new                  1.011916   1.877209   0.539  0.59005   
## Age_18_graduate:year_new -0.058672   0.101004  -0.581  0.56154   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.194 on 598 degrees of freedom
## Multiple R-squared:  0.004707,   Adjusted R-squared:  -0.0002859 
## F-statistic: 0.9427 on 3 and 598 DF,  p-value: 0.4196
Long_format_2005_2015_new$predicted_G41B <- predict(G41A_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41B <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41B, color = factor(year_factor_G41B))) + geom_point(aes(shape = year_factor_G41B), alpha = 0.5) + geom_line(aes(y = predicted_G41B), size = 1) +  labs(title = "Importance of decision-making (G41B) by Age and Year", x = "Age",y = "Importance of decision-making (G41B)", color = "Year", shape = "Year") + theme_minimal()

t_test_G41B <- t.test(G41B ~ year_factor_G41B, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41B
## 
##  Two Sample t-test
## 
## data:  G41B by year_factor_G41B
## t = 1.5794, df = 600, p-value = 0.1148
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.03783767  0.34863502
## sample estimates:
## mean in group 2005 mean in group 2015 
##           5.718391           5.562992
mean_G41B_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0515
## # A tibble: 2 × 2
##    year average_G41B
##   <dbl>        <dbl>
## 1  2005         5.72
## 2  2015         5.56
sd_G41B_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0515
## # A tibble: 2 × 2
##    year sd_G41B
##   <dbl>   <dbl>
## 1  2005    1.10
## 2  2015    1.31
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41B, y = G41B, fill = year_factor_G41B)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of decision-making (G41B) in 2005 and 2015",x = "Year", y = "Importance of decision-making (G41B)", fill = "Year") + theme_minimal()

effect_size_G41B_05_15 <- cohens_d(data_2005$G41B, data_2015$G41B, paired = FALSE)
effect_size_G41B_05_15
## Cohen's d |        95% CI
## -------------------------
## 0.13      | [-0.03, 0.29]
## 
## - Estimated using pooled SD.

G41C: Importance of challenging work (SPECIAL)

Long_format_2005_2015_new %>% group_by(year) %>% count(G41C)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41C     n
##    <dbl> <dbl> <int>
##  1  2005     2     4
##  2  2005     3     8
##  3  2005     4    50
##  4  2005     5   102
##  5  2005     6   109
##  6  2005     7    75
##  7  2015     0     1
##  8  2015     1     2
##  9  2015     2     3
## 10  2015     3    14
## 11  2015     4    32
## 12  2015     5    76
## 13  2015     6    66
## 14  2015     7    60
Long_format_2005_2015_new_G41C <- Long_format_2005_2015_new %>% filter(G41C > 0)
Long_format_2005_2015_new_G41C %>% count(year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   348
## 2  2015   253
G41C_Regression_05_15 <- lm(G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G41C)
G41C_Regression_05_15
## 
## Call:
## lm(formula = G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G41C)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  2.82459                   0.14303                   0.94098  
## Age_18_graduate:year_new  
##                 -0.05369
summary(G41C_Regression_05_15)
## 
## Call:
## lm(formula = G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G41C)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4630 -0.6213  0.3787  0.6264  1.6264 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)               2.82459    1.86073   1.518    0.130
## Age_18_graduate           0.14303    0.10012   1.429    0.154
## year_new                  0.94098    1.86073   0.506    0.613
## Age_18_graduate:year_new -0.05369    0.10012  -0.536    0.592
## 
## Residual standard error: 1.183 on 597 degrees of freedom
## Multiple R-squared:  0.005948,   Adjusted R-squared:  0.000953 
## F-statistic: 1.191 on 3 and 597 DF,  p-value: 0.3125
Long_format_2005_2015_new_G41C$predicted_G41C <- predict(G41C_Regression_05_15)
Long_format_2005_2015_new_G41C$year_factor_G41C <- factor(Long_format_2005_2015_new_G41C$year)
ggplot(Long_format_2005_2015_new_G41C, aes(x = Age_18_graduate, y = G41C, color = factor(year_factor_G41C))) + geom_point(aes(shape = year_factor_G41C), alpha = 0.5) + geom_line(aes(y = predicted_G41C), size = 1) +  labs(title = "Importance of challenging work (G41C) by Age and Year", x = "Age",y = "Importance of challenging work (G41C)", color = "Year", shape = "Year") + theme_minimal()

t_test_G41C <- t.test(G41C ~ year_factor_G41C, data = Long_format_2005_2015_new_G41C, var.equal = TRUE)
t_test_G41C
## 
##  Two Sample t-test
## 
## data:  G41C by year_factor_G41C
## t = 0.91317, df = 599, p-value = 0.3615
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.1027390  0.2813088
## sample estimates:
## mean in group 2005 mean in group 2015 
##           5.520115           5.430830
mean_G41C_0515 <- Long_format_2005_2015_new_G41C %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0515
## # A tibble: 2 × 2
##    year average_G41C
##   <dbl>        <dbl>
## 1  2005         5.52
## 2  2015         5.43
sd_G41C_0515 <- Long_format_2005_2015_new_G41C %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0515
## # A tibble: 2 × 2
##    year sd_G41C
##   <dbl>   <dbl>
## 1  2005    1.12
## 2  2015    1.27
ggplot(Long_format_2005_2015_new_G41C, aes(x = year_factor_G41C, y = G41C, fill = year_factor_G41C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of challenging work (G41C) in 2005 and 2015",x = "Year", y = "Importance of challenging work (G41C)", fill = "Year") + theme_minimal()

data_2005_G41C <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G41C > 0)
data_2015_G41C <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G41C > 0)
effect_size_G41C_05_15 <- cohens_d(data_2005_G41C$G41C, data_2015_G41C$G41C, paired = FALSE)
effect_size_G41C_05_15
## Cohen's d |        95% CI
## -------------------------
## 0.08      | [-0.09, 0.24]
## 
## - Estimated using pooled SD.

G41H: Importance of healthcare benefits

Long_format_2005_2015_new %>% group_by(year) %>% count(G41H)
## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41H     n
##    <dbl> <dbl> <int>
##  1  2005     1     1
##  2  2005     2     2
##  3  2005     3     4
##  4  2005     4     6
##  5  2005     5    37
##  6  2005     6    90
##  7  2005     7   208
##  8  2015     1     1
##  9  2015     2     1
## 10  2015     3     3
## 11  2015     4    10
## 12  2015     5    40
## 13  2015     6    64
## 14  2015     7   135
G41H_Regression_05_15 <- lm(G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41H_Regression_05_15
## 
## Call:
## lm(formula = G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  5.07656                   0.06684                   2.92870  
## Age_18_graduate:year_new  
##                 -0.16237
summary(G41H_Regression_05_15)
## 
## Call:
## lm(formula = G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2737 -0.2857  0.4970  0.7263  0.8098 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               5.07656    1.54247   3.291  0.00106 **
## Age_18_graduate           0.06684    0.08299   0.805  0.42091   
## year_new                  2.92870    1.54247   1.899  0.05808 . 
## Age_18_graduate:year_new -0.16237    0.08299  -1.956  0.05087 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.981 on 598 degrees of freedom
## Multiple R-squared:  0.01521,    Adjusted R-squared:  0.01027 
## F-statistic: 3.079 on 3 and 598 DF,  p-value: 0.02707
Long_format_2005_2015_new$predicted_G41H <- predict(G41H_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41H <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41H, color = factor(year_factor_G41H))) + geom_point(aes(shape = year_factor_G41H), alpha = 0.5) + geom_line(aes(y = predicted_G41H), size = 1) +  labs(title = "Importance of healthcare benefits (G41H) by Age and Year", x = "Age",y = "Importance of healthcare benefits (G41H)", color = "Year", shape = "Year") + theme_minimal()

t_test_G41H <- t.test(G41H ~ year_factor_G41H, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41H
## 
##  Two Sample t-test
## 
## data:  G41H by year_factor_G41H
## t = 1.9789, df = 600, p-value = 0.04829
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  0.001212977 0.320083068
## sample estimates:
## mean in group 2005 mean in group 2015 
##           6.385057           6.224409
mean_G41H_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0515
## # A tibble: 2 × 2
##    year average_G41H
##   <dbl>        <dbl>
## 1  2005         6.39
## 2  2015         6.22
sd_G41H_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0515
## # A tibble: 2 × 2
##    year sd_G41H
##   <dbl>   <dbl>
## 1  2005   0.946
## 2  2015   1.03
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41H, y = G41H, fill = year_factor_G41H)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of healthcare benefits (G41H) in 2005 and 2015",x = "Year", y = "Importance of healthcare benefits (G41H)", fill = "Year") + theme_minimal()

effect_size_G41H_05_15 <- cohens_d(data_2005$G41H, data_2015$G41H, paired = FALSE)
effect_size_G41H_05_15
## Cohen's d |       95% CI
## ------------------------
## 0.16      | [0.00, 0.33]
## 
## - Estimated using pooled SD.

G41P: Importance of job central to identity (SPECIAL)

Long_format_2005_2015_new %>% group_by(year) %>% count(G41P)
## # A tibble: 15 × 3
## # Groups:   year [2]
##     year  G41P     n
##    <dbl> <dbl> <int>
##  1  2005     1    11
##  2  2005     2    13
##  3  2005     3    32
##  4  2005     4    57
##  5  2005     5    92
##  6  2005     6    76
##  7  2005     7    66
##  8  2005     9     1
##  9  2015     1    11
## 10  2015     2    18
## 11  2015     3    15
## 12  2015     4    47
## 13  2015     5    64
## 14  2015     6    49
## 15  2015     7    50
Long_format_2005_2015_new_G41P <- Long_format_2005_2015_new %>% filter(G41P < 9)
Long_format_2005_2015_new_G41P %>% count(year)
## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   347
## 2  2015   254
G41P_Regression_05_15 <- lm(G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G41P)
G41P_Regression_05_15
## 
## Call:
## lm(formula = G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G41P)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                 5.101890                 -0.007362                  2.450055  
## Age_18_graduate:year_new  
##                -0.135023
summary(G41P_Regression_05_15)
## 
## Call:
## lm(formula = G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new_G41P)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0774 -0.9497  0.0503  1.0503  2.1534 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               5.101890   2.499282   2.041   0.0417 *
## Age_18_graduate          -0.007362   0.134480  -0.055   0.9564  
## year_new                  2.450055   2.499282   0.980   0.3273  
## Age_18_graduate:year_new -0.135023   0.134480  -1.004   0.3158  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.589 on 597 degrees of freedom
## Multiple R-squared:  0.002977,   Adjusted R-squared:  -0.002033 
## F-statistic: 0.5941 on 3 and 597 DF,  p-value: 0.619
Long_format_2005_2015_new_G41P$predicted_G41P <- predict(G41P_Regression_05_15)
Long_format_2005_2015_new_G41P$year_factor_G41P <- factor(Long_format_2005_2015_new_G41P$year)
ggplot(Long_format_2005_2015_new_G41P, aes(x = Age_18_graduate, y = G41P, color = factor(year_factor_G41P))) + geom_point(aes(shape = year_factor_G41P), alpha = 0.5) + geom_line(aes(y = predicted_G41P), size = 1) +  labs(title = "Importance of job central to identity (G41P) by Age and Year", x = "Age",y = "Importance of job central to identity (G41P)", color = "Year", shape = "Year") + theme_minimal()

t_test_G41P <- t.test(G41P ~ year_factor_G41P, data = Long_format_2005_2015_new_G41P, var.equal = TRUE)
t_test_G41P
## 
##  Two Sample t-test
## 
## data:  G41P by year_factor_G41P
## t = 0.86887, df = 599, p-value = 0.3853
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.1435382  0.3713174
## sample estimates:
## mean in group 2005 mean in group 2015 
##           5.011527           4.897638
mean_G41P_0515 <- Long_format_2005_2015_new_G41P %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0515
## # A tibble: 2 × 2
##    year average_G41P
##   <dbl>        <dbl>
## 1  2005         5.01
## 2  2015         4.90
sd_G41P_0515 <- Long_format_2005_2015_new_G41P %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0515
## # A tibble: 2 × 2
##    year sd_G41P
##   <dbl>   <dbl>
## 1  2005    1.54
## 2  2015    1.65
ggplot(Long_format_2005_2015_new_G41P, aes(x = year_factor_G41P, y = G41P, fill = year_factor_G41P)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job central to identity (G41P) in 2005 and 2015",x = "Year", y = "Importance of job central to identity (G41P)", fill = "Year") + theme_minimal()

data_2005_G41P <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G41P < 9)
data_2015_G41P <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G41P < 9)
effect_size_G41P_05_15 <- cohens_d(data_2005_G41P$G41P, data_2015_G41P$G41P, paired = FALSE)
effect_size_G41P_05_15
## Cohen's d |        95% CI
## -------------------------
## 0.07      | [-0.09, 0.23]
## 
## - Estimated using pooled SD.

H1: General Health

Long_format_2005_2015_new %>% group_by(year) %>% count(H1)
## # A tibble: 10 × 3
## # Groups:   year [2]
##     year    H1     n
##    <dbl> <dbl> <int>
##  1  2005     1    94
##  2  2005     2   145
##  3  2005     3    84
##  4  2005     4    23
##  5  2005     5     2
##  6  2015     1    68
##  7  2015     2   102
##  8  2015     3    59
##  9  2015     4    24
## 10  2015     5     1
H1_Regression_05_15 <- lm(H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
H1_Regression_05_15
## 
## Call:
## lm(formula = H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Coefficients:
##              (Intercept)           Age_18_graduate                  year_new  
##                  2.33654                  -0.01083                  -1.78237  
## Age_18_graduate:year_new  
##                  0.09726
summary(H1_Regression_05_15)
## 
## Call:
## lm(formula = H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new, 
##     data = Long_format_2005_2015_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1963 -1.0651 -0.1732  0.8268  2.9349 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)               2.33654    1.44708   1.615    0.107
## Age_18_graduate          -0.01083    0.07786  -0.139    0.889
## year_new                 -1.78237    1.44708  -1.232    0.219
## Age_18_graduate:year_new  0.09726    0.07786   1.249    0.212
## 
## Residual standard error: 0.9204 on 598 degrees of freedom
## Multiple R-squared:  0.003433,   Adjusted R-squared:  -0.001567 
## F-statistic: 0.6866 on 3 and 598 DF,  p-value: 0.5604
Long_format_2005_2015_new$predicted_H1 <- predict(H1_Regression_05_15)
Long_format_2005_2015_new$year_factor_H1 <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = H1, color = factor(year_factor_H1))) + geom_point(aes(shape = year_factor_H1), alpha = 0.5) + geom_line(aes(y = predicted_H1), size = 1) +  labs(title = "General Health (H1) by Age and Year", x = "Age",y = "General Health (H1)", color = "Year", shape = "Year") + theme_minimal()

t_test_H1 <- t.test(H1 ~ year_factor_H1, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_H1
## 
##  Two Sample t-test
## 
## data:  H1 by year_factor_H1
## t = -0.5882, df = 600, p-value = 0.5566
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
##  -0.1937954  0.1044660
## sample estimates:
## mean in group 2005 mean in group 2015 
##           2.120690           2.165354
mean_H1_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0515
## # A tibble: 2 × 2
##    year average_H1
##   <dbl>      <dbl>
## 1  2005       2.12
## 2  2015       2.17
sd_H1_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0515
## # A tibble: 2 × 2
##    year sd_H1
##   <dbl> <dbl>
## 1  2005 0.903
## 2  2015 0.943
ggplot(Long_format_2005_2015_new, aes(x = year_factor_H1, y = H1, fill = year_factor_H1)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of General Health (H1) in 2005 and 2015",x = "Year", y = "General Health (H1)", fill = "Year") + theme_minimal()

effect_size_H1_05_15 <- cohens_d(data_2005$H1, data_2015$H1, paired = FALSE)
effect_size_H1_05_15
## Cohen's d |        95% CI
## -------------------------
## -0.05     | [-0.21, 0.11]
## 
## - Estimated using pooled SD.