We will be going through
library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)
library(effsize)
TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx")
view(TAS_data_long_format_age)
head(TAS_data_long_format_age)
## # A tibble: 6 × 42
## TAS TAS05 TAS09 TAS15 `1968 Interview Number` `Person Number` Gender
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 1 1 NA 4 180 2
## 2 2 1 1 NA 5 32 2
## 3 2 1 1 NA 6 34 1
## 4 2 1 1 NA 14 30 1
## 5 1 1 NA NA 18 38 2
## 6 2 1 1 NA 47 34 2
## # ℹ 35 more variables: `Individual is sample` <dbl>, `Year ID Number` <dbl>,
## # `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## # `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## # C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## # E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## # G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## # G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>, …
Filter the data (2005 & 2015)
Long_format_2005_2015_new <- TAS_data_long_format_age %>% filter(year==2005| year==2015) %>% filter(Age_18_graduate == 18|Age_18_graduate == 19) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1))
knitr::kable(head(Long_format_2005_2015_new[, 1:43]))
| TAS | TAS05 | TAS09 | TAS15 | 1968 Interview Number | Person Number | Gender | Individual is sample | Year ID Number | Sequence Number | Relationship to Head | Release Number | B5A | B5D | B6C | C2D | C2E | C2F | D2D3_month | D2D3_year | E1_1st_mention | E1_2nd_mention | E1_3rd_mention | E3 | G1 | G2_month | G2_year | G10 | G11 | G30A | G41A | G41B | G41C | G41H | G41P | H1 | L7_1st_mention | L7_2nd_mention | L7_3rd_mention | Age_17_graduate | Age_18_graduate | year | year_new |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | NA | NA | 18 | 38 | 2 | 2 | 5647 | 3 | 98 | 5 | 3 | 4 | 3 | 4 | 2 | 2 | 0 | 0 | 3 | 7 | 0 | 5 | 1 | 6 | 2004 | 1 | 5 | 6 | 5 | 5 | 5 | 7 | 5 | 2 | 1 | 0 | 0 | 18 | 19 | 2005 | -1 |
| 2 | 1 | 1 | NA | 47 | 34 | 2 | 2 | 2516 | 3 | 30 | 5 | 4 | 5 | 6 | 4 | 5 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2005 | 5 | 0 | 6 | 3 | 6 | 4 | 7 | 4 | 1 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 |
| 2 | 1 | 1 | NA | 53 | 36 | 2 | 2 | 1616 | 3 | 30 | 5 | 4 | 5 | 7 | 4 | 1 | 1 | 0 | 0 | 6 | 0 | 0 | 1 | 1 | 6 | 2005 | 1 | 5 | 7 | 7 | 7 | 5 | 7 | 6 | 2 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 |
| 2 | 1 | 1 | NA | 79 | 32 | 2 | 2 | 6520 | 2 | 30 | 5 | 3 | 4 | 6 | 7 | 5 | 3 | 0 | 0 | 1 | 7 | 0 | 0 | 1 | 5 | 2004 | 1 | 1 | 0 | 7 | 7 | 6 | 7 | 4 | 1 | 1 | 0 | 0 | 18 | 19 | 2005 | -1 |
| 2 | 1 | 1 | NA | 88 | 35 | 1 | 2 | 3411 | 2 | 30 | 5 | 2 | 5 | 7 | 3 | 1 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2005 | 1 | 1 | 7 | 2 | 6 | 5 | 6 | 7 | 2 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 |
| 2 | 1 | 1 | NA | 89 | 34 | 2 | 2 | 4527 | 3 | 30 | 5 | 2 | 4 | 5 | 2 | 3 | 1 | 0 | 0 | 3 | 7 | 0 | 5 | 1 | 5 | 2005 | 1 | 1 | 7 | 5 | 6 | 4 | 7 | 5 | 1 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 |
Long_format_2005_2015_new %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 348
## 2 2015 254
data_2005 <- Long_format_2005_2015_new %>% filter(year == 2005)
data_2015 <- Long_format_2005_2015_new %>% filter(year == 2015)
Long_format_2005_2015_new %>% group_by(year) %>% count(B5A)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5A n
## <dbl> <dbl> <int>
## 1 2005 1 14
## 2 2005 2 73
## 3 2005 3 91
## 4 2005 4 111
## 5 2005 5 59
## 6 2015 1 9
## 7 2015 2 52
## 8 2015 3 60
## 9 2015 4 80
## 10 2015 5 53
B5A_Regression_05_15 <- lm(B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B5A_Regression_05_15
##
## Call:
## lm(formula = B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## -2.669620 0.327591 -0.117341
## Age_18_graduate:year_new
## 0.007337
summary(B5A_Regression_05_15)
##
## Call:
## lm(formula = B5A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5767 -0.5767 0.4233 0.7877 1.7877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.669620 1.749622 -1.526 0.127582
## Age_18_graduate 0.327591 0.094139 3.480 0.000538 ***
## year_new -0.117341 1.749622 -0.067 0.946551
## Age_18_graduate:year_new 0.007337 0.094139 0.078 0.937899
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.113 on 598 degrees of freedom
## Multiple R-squared: 0.02196, Adjusted R-squared: 0.01705
## F-statistic: 4.476 on 3 and 598 DF, p-value: 0.00405
Long_format_2005_2015_new$predicted_B5A <- predict(B5A_Regression_05_15)
Long_format_2005_2015_new$year_factor_B5A <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B5A, color = year_factor_B5A)) + geom_point(aes(shape = year_factor_B5A), alpha = 0.5) + geom_line(aes(y = predicted_B5A), size = 1) + labs(title = "Responsibility for Self (B5A) by Age and Year",x = "Age", y = "Responsibility for Self (B5A)", color = "Year", shape = "Year") + theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
t_test_B5A <- t.test(B5A ~ year_factor_B5A, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B5A
##
## Two Sample t-test
##
## data: B5A by year_factor_B5A
## t = -0.95946, df = 600, p-value = 0.3377
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.2707990 0.0930454
## sample estimates:
## mean in group 2005 mean in group 2015
## 3.367816 3.456693
mean_B5A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0515
## # A tibble: 2 × 2
## year average_B5A
## <dbl> <dbl>
## 1 2005 3.37
## 2 2015 3.46
sd_B5A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0515
## # A tibble: 2 × 2
## year sd_B5A
## <dbl> <dbl>
## 1 2005 1.11
## 2 2015 1.14
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B5A, y = B5A, fill = year_factor_B5A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (B5A) in 2005 and 2015",x = "Year", y = "Responsibility for Self (B5A)", fill = "Year") + theme_minimal()
effect_size_B5A_05_15 <- cohens_d(data_2005$B5A, data_2015$B5A, paired = FALSE)
effect_size_B5A_05_15
## Cohen's d | 95% CI
## -------------------------
## -0.08 | [-0.24, 0.08]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(B5D)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5D n
## <dbl> <dbl> <int>
## 1 2005 1 7
## 2 2005 2 14
## 3 2005 3 39
## 4 2005 4 89
## 5 2005 5 199
## 6 2015 1 10
## 7 2015 2 10
## 8 2015 3 22
## 9 2015 4 61
## 10 2015 5 151
B5D_Regression_05_15 <- lm(B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B5D_Regression_05_15
##
## Call:
## lm(formula = B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 0.4266 0.2090 -1.9583
## Age_18_graduate:year_new
## 0.1044
summary(B5D_Regression_05_15)
##
## Call:
## lm(formula = B5D ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4233 -0.3728 0.5767 0.6272 0.8901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.42660 1.56617 0.272 0.7854
## Age_18_graduate 0.20902 0.08427 2.480 0.0134 *
## year_new -1.95832 1.56617 -1.250 0.2116
## Age_18_graduate:year_new 0.10440 0.08427 1.239 0.2159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9961 on 598 degrees of freedom
## Multiple R-squared: 0.01116, Adjusted R-squared: 0.006201
## F-statistic: 2.25 on 3 and 598 DF, p-value: 0.08146
Long_format_2005_2015_new$predicted_B5D <- predict(B5D_Regression_05_15)
Long_format_2005_2015_new$year_factor_B5D <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B5D, color = factor(year_factor_B5D))) + geom_point(aes(shape = year_factor_B5D), alpha = 0.5) + geom_line(aes(y = predicted_B5D), size = 1) + labs(title = "Managing own money (B5D) by Age and Year", x = "Age",y = "Managing own money (B5D)", color = "Year", shape = "Year") + theme_minimal()
t_test_B5D <- t.test(B5D ~ year_factor_B5D, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B5D
##
## Two Sample t-test
##
## data: B5D by year_factor_B5D
## t = 0.096233, df = 600, p-value = 0.9234
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.1541366 0.1700204
## sample estimates:
## mean in group 2005 mean in group 2015
## 4.318966 4.311024
mean_B5D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0515
## # A tibble: 2 × 2
## year average_B5D
## <dbl> <dbl>
## 1 2005 4.32
## 2 2015 4.31
sd_B5D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0515
## # A tibble: 2 × 2
## year sd_B5D
## <dbl> <dbl>
## 1 2005 0.963
## 2 2015 1.05
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B5D, y = B5D, fill = year_factor_B5D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Managing Own Money (B5D) in 2005 and 2015",x = "Year", y = "Managing Own Money (B5D)", fill = "Year") + theme_minimal()
effect_size_B5D_05_15 <- cohens_d(data_2005$B5D, data_2015$B5D, paired = FALSE)
effect_size_B5D_05_15
## Cohen's d | 95% CI
## -------------------------
## 7.94e-03 | [-0.15, 0.17]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(B6C)
## # A tibble: 14 × 3
## # Groups: year [2]
## year B6C n
## <dbl> <dbl> <int>
## 1 2005 1 4
## 2 2005 2 11
## 3 2005 3 23
## 4 2005 4 41
## 5 2005 5 98
## 6 2005 6 69
## 7 2005 7 102
## 8 2015 1 3
## 9 2015 2 8
## 10 2015 3 11
## 11 2015 4 36
## 12 2015 5 70
## 13 2015 6 65
## 14 2015 7 61
B6C_Regression_05_15 <- lm(B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
B6C_Regression_05_15
##
## Call:
## lm(formula = B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 6.50185 -0.06079 -1.55673
## Age_18_graduate:year_new
## 0.08337
summary(B6C_Regression_05_15)
##
## Call:
## lm(formula = B6C ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4637 -0.4637 -0.3195 1.5363 1.6805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.50185 2.22003 2.929 0.00353 **
## Age_18_graduate -0.06079 0.11945 -0.509 0.61101
## year_new -1.55673 2.22003 -0.701 0.48344
## Age_18_graduate:year_new 0.08337 0.11945 0.698 0.48546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.412 on 598 degrees of freedom
## Multiple R-squared: 0.001631, Adjusted R-squared: -0.003377
## F-statistic: 0.3257 on 3 and 598 DF, p-value: 0.8068
Long_format_2005_2015_new$predicted_B6C <- predict(B6C_Regression_05_15)
Long_format_2005_2015_new$year_factor_B6C <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = B6C, color = factor(year_factor_B6C))) + geom_point(aes(shape = year_factor_B6C), alpha = 0.5) + geom_line(aes(y = predicted_B6C), size = 1) + labs(title = "Money management skills (B6C) by Age and Year", x = "Age",y = "Money Management skills (B6C)", color = "Year", shape = "Year") + theme_minimal()
t_test_B6C <- t.test(B6C ~ year_factor_B6C, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_B6C
##
## Two Sample t-test
##
## data: B6C by year_factor_B6C
## t = 0.23653, df = 600, p-value = 0.8131
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.2011003 0.2561732
## sample estimates:
## mean in group 2005 mean in group 2015
## 5.393678 5.366142
mean_B6C_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0515
## # A tibble: 2 × 2
## year average_B6C
## <dbl> <dbl>
## 1 2005 5.39
## 2 2015 5.37
sd_B6C_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0515
## # A tibble: 2 × 2
## year sd_B6C
## <dbl> <dbl>
## 1 2005 1.44
## 2 2015 1.37
ggplot(Long_format_2005_2015_new, aes(x = year_factor_B6C, y = B6C, fill = year_factor_B6C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Money management skills (B6C) in 2005 and 2015",x = "Year", y = "Money management skills (B6C)", fill = "Year") + theme_minimal()
effect_size_B6C_05_15 <- cohens_d(data_2005$B6C, data_2015$B6C, paired = FALSE)
effect_size_B6C_05_15
## Cohen's d | 95% CI
## -------------------------
## 0.02 | [-0.14, 0.18]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(C2D)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2D n
## <dbl> <dbl> <int>
## 1 2005 1 54
## 2 2005 2 52
## 3 2005 3 57
## 4 2005 4 66
## 5 2005 5 49
## 6 2005 6 34
## 7 2005 7 36
## 8 2015 1 48
## 9 2015 2 39
## 10 2015 3 48
## 11 2015 4 43
## 12 2015 5 37
## 13 2015 6 17
## 14 2015 7 22
C2D_Regression_05_15 <- lm(C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2D_Regression_05_15
##
## Call:
## lm(formula = C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 0.97875 0.14098 -0.48721
## Age_18_graduate:year_new
## 0.01913
summary(C2D_Regression_05_15)
##
## Call:
## lm(formula = C2D ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7811 -1.6592 0.2189 1.3408 3.6264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.97875 2.94572 0.332 0.740
## Age_18_graduate 0.14098 0.15850 0.890 0.374
## year_new -0.48721 2.94572 -0.165 0.869
## Age_18_graduate:year_new 0.01913 0.15850 0.121 0.904
##
## Residual standard error: 1.873 on 598 degrees of freedom
## Multiple R-squared: 0.005396, Adjusted R-squared: 0.0004064
## F-statistic: 1.081 on 3 and 598 DF, p-value: 0.3563
Long_format_2005_2015_new$predicted_C2D <- predict(C2D_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2D <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2D, color = factor(year_factor_C2D))) + geom_point(aes(shape = year_factor_C2D), alpha = 0.5) + geom_line(aes(y = predicted_C2D), size = 1) + labs(title = "Worry about expenses (C2D) by Age and Year", x = "Age",y = "Worry about expenses (C2D)", color = "Year", shape = "Year") + theme_minimal()
t_test_C2D <- t.test(C2D ~ year_factor_C2D, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2D
##
## Two Sample t-test
##
## data: C2D by year_factor_C2D
## t = 1.5669, df = 600, p-value = 0.1177
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.06132932 0.54535503
## sample estimates:
## mean in group 2005 mean in group 2015
## 3.718391 3.476378
mean_C2D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0515
## # A tibble: 2 × 2
## year average_C2D
## <dbl> <dbl>
## 1 2005 3.72
## 2 2015 3.48
sd_C2D_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0515
## # A tibble: 2 × 2
## year sd_C2D
## <dbl> <dbl>
## 1 2005 1.88
## 2 2015 1.85
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2D, y = C2D, fill = year_factor_C2D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about Expenses (C2D) in 2005 and 2015",x = "Year", y = "Worry about Expenses (C2D)", fill = "Year") + theme_minimal()
effect_size_C2D_05_15 <- cohens_d(data_2005$C2D, data_2015$C2D, paired = FALSE)
effect_size_C2D_05_15
## Cohen's d | 95% CI
## -------------------------
## 0.13 | [-0.03, 0.29]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(C2E)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2E n
## <dbl> <dbl> <int>
## 1 2005 1 64
## 2 2005 2 65
## 3 2005 3 57
## 4 2005 4 41
## 5 2005 5 45
## 6 2005 6 45
## 7 2005 7 31
## 8 2015 1 46
## 9 2015 2 42
## 10 2015 3 39
## 11 2015 4 45
## 12 2015 5 29
## 13 2015 6 21
## 14 2015 7 32
C2E_Regression_05_15 <- lm(C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2E_Regression_05_15
##
## Call:
## lm(formula = C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## -1.59475 0.27944 -1.26280
## Age_18_graduate:year_new
## 0.06857
summary(C2E_Regression_05_15)
##
## Call:
## lm(formula = C2E ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7546 -1.6746 -0.4066 1.5363 3.5934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.59475 3.08875 -0.516 0.6058
## Age_18_graduate 0.27944 0.16619 1.681 0.0932 .
## year_new -1.26280 3.08875 -0.409 0.6828
## Age_18_graduate:year_new 0.06857 0.16619 0.413 0.6801
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.964 on 598 degrees of freedom
## Multiple R-squared: 0.004974, Adjusted R-squared: -1.781e-05
## F-statistic: 0.9964 on 3 and 598 DF, p-value: 0.394
Long_format_2005_2015_new$predicted_C2E <- predict(C2E_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2E <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2E, color = factor(year_factor_C2E))) + geom_point(aes(shape = year_factor_C2E), alpha = 0.5) + geom_line(aes(y = predicted_C2E), size = 1) + labs(title = "Worry about future job (C2E) by Age and Year", x = "Age",y = "Worry about future job (C2E)", color = "Year", shape = "Year") + theme_minimal()
t_test_C2E <- t.test(C2E ~ year_factor_C2E, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2E
##
## Two Sample t-test
##
## data: C2E by year_factor_C2E
## t = -0.39344, df = 600, p-value = 0.6941
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.3824418 0.2547832
## sample estimates:
## mean in group 2005 mean in group 2015
## 3.566092 3.629921
mean_C2E_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0515
## # A tibble: 2 × 2
## year average_C2E
## <dbl> <dbl>
## 1 2005 3.57
## 2 2015 3.63
sd_C2E_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0515
## # A tibble: 2 × 2
## year sd_C2E
## <dbl> <dbl>
## 1 2005 1.96
## 2 2015 1.98
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2E, y = C2E, fill = year_factor_C2E)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about Future Job (C2E) in 2005 and 2015",x = "Year", y = "Worry about Future Job (C2E)", fill = "Year") + theme_minimal()
effect_size_C2E_05_15 <- cohens_d(data_2005$C2E, data_2015$C2E, paired = FALSE)
effect_size_C2E_05_15
## Cohen's d | 95% CI
## -------------------------
## -0.03 | [-0.19, 0.13]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(C2F)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2F n
## <dbl> <dbl> <int>
## 1 2005 1 82
## 2 2005 2 84
## 3 2005 3 56
## 4 2005 4 52
## 5 2005 5 38
## 6 2005 6 18
## 7 2005 7 18
## 8 2015 1 58
## 9 2015 2 57
## 10 2015 3 47
## 11 2015 4 36
## 12 2015 5 28
## 13 2015 6 17
## 14 2015 7 11
C2F_Regression_05_15 <- lm(C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
C2F_Regression_05_15
##
## Call:
## lm(formula = C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 1.42792 0.08599 -2.84692
## Age_18_graduate:year_new
## 0.15402
summary(C2F_Regression_05_15)
##
## Call:
## lm(formula = C2F ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1411 -1.1411 -0.1411 1.0178 4.0989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.42792 2.74853 0.520 0.604
## Age_18_graduate 0.08599 0.14789 0.581 0.561
## year_new -2.84692 2.74853 -1.036 0.301
## Age_18_graduate:year_new 0.15402 0.14789 1.041 0.298
##
## Residual standard error: 1.748 on 598 degrees of freedom
## Multiple R-squared: 0.002172, Adjusted R-squared: -0.002834
## F-statistic: 0.4338 on 3 and 598 DF, p-value: 0.7289
Long_format_2005_2015_new$predicted_C2F <- predict(C2F_Regression_05_15)
Long_format_2005_2015_new$year_factor_C2F <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = C2F, color = factor(year_factor_C2F))) + geom_point(aes(shape = year_factor_C2F), alpha = 0.5) + geom_line(aes(y = predicted_C2F), size = 1) + labs(title = "Discouraged about future (C2F) by Age and Year", x = "Age",y = "Discouraged about future (C2F)", color = "Year", shape = "Year") + theme_minimal()
t_test_C2F <- t.test(C2F ~ year_factor_C2F, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_C2F
##
## Two Sample t-test
##
## data: C2F by year_factor_C2F
## t = -0.26272, df = 600, p-value = 0.7929
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.3210160 0.2452626
## sample estimates:
## mean in group 2005 mean in group 2015
## 3.017241 3.055118
mean_C2F_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0515
## # A tibble: 2 × 2
## year average_C2F
## <dbl> <dbl>
## 1 2005 3.02
## 2 2015 3.06
sd_C2F_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0515
## # A tibble: 2 × 2
## year sd_C2F
## <dbl> <dbl>
## 1 2005 1.75
## 2 2015 1.74
ggplot(Long_format_2005_2015_new, aes(x = year_factor_C2F, y = C2F, fill = year_factor_C2F)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Discouraged about future (C2F) in 2005 and 2015",x = "Year", y = "Discouraged about future (C2F)", fill = "Year") + theme_minimal()
effect_size_C2F_05_15 <- cohens_d(data_2005$C2F, data_2015$C2F, paired = FALSE)
effect_size_C2F_05_15
## Cohen's d | 95% CI
## -------------------------
## -0.02 | [-0.18, 0.14]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G30A)
## # A tibble: 12 × 3
## # Groups: year [2]
## year G30A n
## <dbl> <dbl> <int>
## 1 2005 0 35
## 2 2005 1 1
## 3 2005 3 2
## 4 2005 4 15
## 5 2005 5 61
## 6 2005 6 114
## 7 2005 7 120
## 8 2015 3 2
## 9 2015 4 13
## 10 2015 5 48
## 11 2015 6 88
## 12 2015 7 103
Long_format_2005_2015_new_G30A <- Long_format_2005_2015_new %>% filter(G30A > 0)
Long_format_2005_2015_new_G30A %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 313
## 2 2015 254
G30A_Regression_05_15 <- lm(G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G30A)
G30A_Regression_05_15
##
## Call:
## lm(formula = G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G30A)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 5.00387 0.05770 0.37158
## Age_18_graduate:year_new
## -0.01934
summary(G30A_Regression_05_15)
##
## Call:
## lm(formula = G30A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G30A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0191 -0.5617 -0.0659 0.9038 0.9809
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.00387 1.51653 3.300 0.00103 **
## Age_18_graduate 0.05770 0.08159 0.707 0.47972
## year_new 0.37158 1.51653 0.245 0.80653
## Age_18_graduate:year_new -0.01934 0.08159 -0.237 0.81269
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9436 on 563 degrees of freedom
## Multiple R-squared: 0.001401, Adjusted R-squared: -0.00392
## F-statistic: 0.2634 on 3 and 563 DF, p-value: 0.8518
Long_format_2005_2015_new_G30A$predicted_G30A <- predict(G30A_Regression_05_15)
Long_format_2005_2015_new_G30A$year_factor_G30A <- factor(Long_format_2005_2015_new_G30A$year)
ggplot(Long_format_2005_2015_new_G30A, aes(x = Age_18_graduate, y = G30A, color = factor(year_factor_G30A))) + geom_point(aes(shape = year_factor_G30A), alpha = 0.5) + geom_line(aes(y = predicted_G30A), size = 1) + labs(title = "Likelihood of well-paying job (G30A) by Age and Year", x = "Age",y = "Likelihood of well-paying job (G30A)", color = "Year", shape = "Year") + theme_minimal()
t_test_G30A <- t.test(G30A ~ year_factor_G30A, data = Long_format_2005_2015_new_G30A, var.equal = TRUE)
t_test_G30A
##
## Two Sample t-test
##
## data: G30A by year_factor_G30A
## t = -0.41515, df = 565, p-value = 0.6782
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.1893790 0.1232927
## sample estimates:
## mean in group 2005 mean in group 2015
## 6.057508 6.090551
mean_G30A_0515 <- Long_format_2005_2015_new_G30A %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0515
## # A tibble: 2 × 2
## year average_G30A
## <dbl> <dbl>
## 1 2005 6.06
## 2 2015 6.09
sd_G30A_0515 <- Long_format_2005_2015_new_G30A %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0515
## # A tibble: 2 × 2
## year sd_G30A
## <dbl> <dbl>
## 1 2005 0.952
## 2 2015 0.930
ggplot(Long_format_2005_2015_new_G30A, aes(x = year_factor_G30A, y = G30A, fill = year_factor_G30A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Likelihood of well-paying job (G30A) in 2005 and 2015",x = "Year", y = "Likelihood of well-paying job (G30A)", fill = "Year") + theme_minimal()
data_2005_G30A <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G30A > 0)
data_2015_G30A <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G30A > 0)
effect_size_G30A_05_15 <- cohens_d(data_2005_G30A$G30A, data_2015_G30A$G30A, paired = FALSE)
effect_size_G30A_05_15
## Cohen's d | 95% CI
## -------------------------
## -0.04 | [-0.20, 0.13]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G41A)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41A n
## <dbl> <dbl> <int>
## 1 2005 1 12
## 2 2005 2 15
## 3 2005 3 26
## 4 2005 4 29
## 5 2005 5 81
## 6 2005 6 77
## 7 2005 7 108
## 8 2015 1 15
## 9 2015 2 15
## 10 2015 3 22
## 11 2015 4 39
## 12 2015 5 58
## 13 2015 6 54
## 14 2015 7 51
G41A_Regression_05_15 <- lm(G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41A_Regression_05_15
##
## Call:
## lm(formula = G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 8.0868 -0.1593 4.9160
## Age_18_graduate:year_new
## -0.2768
summary(G41A_Regression_05_15)
##
## Call:
## lm(formula = G41A ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4024 -0.7178 0.2822 1.5976 2.2822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.0868 2.6291 3.076 0.00219 **
## Age_18_graduate -0.1593 0.1415 -1.126 0.26056
## year_new 4.9160 2.6291 1.870 0.06199 .
## Age_18_graduate:year_new -0.2768 0.1415 -1.956 0.05088 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.672 on 598 degrees of freedom
## Multiple R-squared: 0.0259, Adjusted R-squared: 0.02101
## F-statistic: 5.3 on 3 and 598 DF, p-value: 0.001303
Long_format_2005_2015_new$predicted_G41A <- predict(G41A_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41A <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41A, color = factor(year_factor_G41A))) + geom_point(aes(shape = year_factor_G41A), alpha = 0.5) + geom_line(aes(y = predicted_G41A), size = 1) + labs(title = "Importance of job status (G41A) by Age and Year", x = "Age",y = "Importance of job status (G41A)", color = "Year", shape = "Year") + theme_minimal()
t_test_G41A <- t.test(G41A ~ year_factor_G41A, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41A
##
## Two Sample t-test
##
## data: G41A by year_factor_G41A
## t = 3.3843, df = 600, p-value = 0.0007602
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## 0.1963895 0.7394870
## sample estimates:
## mean in group 2005 mean in group 2015
## 5.341954 4.874016
mean_G41A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0515
## # A tibble: 2 × 2
## year average_G41A
## <dbl> <dbl>
## 1 2005 5.34
## 2 2015 4.87
sd_G41A_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0515
## # A tibble: 2 × 2
## year sd_G41A
## <dbl> <dbl>
## 1 2005 1.64
## 2 2015 1.73
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41A, y = G41A, fill = year_factor_G41A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job status (G41A) in 2005 and 2015",x = "Year", y = "Importance of job status (G41A)", fill = "Year") + theme_minimal()
effect_size_G41A_05_15 <- cohens_d(data_2005$G41A, data_2015$G41A, paired = FALSE)
effect_size_G41A_05_15
## Cohen's d | 95% CI
## ------------------------
## 0.28 | [0.12, 0.44]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G41B)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41B n
## <dbl> <dbl> <int>
## 1 2005 1 2
## 2 2005 2 1
## 3 2005 3 12
## 4 2005 4 21
## 5 2005 5 97
## 6 2005 6 124
## 7 2005 7 91
## 8 2015 1 4
## 9 2015 2 5
## 10 2015 3 7
## 11 2015 4 21
## 12 2015 5 82
## 13 2015 6 61
## 14 2015 7 74
G41B_Regression_05_15 <- lm(G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41B_Regression_05_15
##
## Call:
## lm(formula = G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 5.753809 -0.005847 1.011916
## Age_18_graduate:year_new
## -0.058672
summary(G41B_Regression_05_15)
##
## Call:
## lm(formula = G41B ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7456 -0.6927 0.2544 1.2544 1.4601
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.753809 1.877209 3.065 0.00227 **
## Age_18_graduate -0.005847 0.101004 -0.058 0.95386
## year_new 1.011916 1.877209 0.539 0.59005
## Age_18_graduate:year_new -0.058672 0.101004 -0.581 0.56154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.194 on 598 degrees of freedom
## Multiple R-squared: 0.004707, Adjusted R-squared: -0.0002859
## F-statistic: 0.9427 on 3 and 598 DF, p-value: 0.4196
Long_format_2005_2015_new$predicted_G41B <- predict(G41A_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41B <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41B, color = factor(year_factor_G41B))) + geom_point(aes(shape = year_factor_G41B), alpha = 0.5) + geom_line(aes(y = predicted_G41B), size = 1) + labs(title = "Importance of decision-making (G41B) by Age and Year", x = "Age",y = "Importance of decision-making (G41B)", color = "Year", shape = "Year") + theme_minimal()
t_test_G41B <- t.test(G41B ~ year_factor_G41B, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41B
##
## Two Sample t-test
##
## data: G41B by year_factor_G41B
## t = 1.5794, df = 600, p-value = 0.1148
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.03783767 0.34863502
## sample estimates:
## mean in group 2005 mean in group 2015
## 5.718391 5.562992
mean_G41B_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0515
## # A tibble: 2 × 2
## year average_G41B
## <dbl> <dbl>
## 1 2005 5.72
## 2 2015 5.56
sd_G41B_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0515
## # A tibble: 2 × 2
## year sd_G41B
## <dbl> <dbl>
## 1 2005 1.10
## 2 2015 1.31
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41B, y = G41B, fill = year_factor_G41B)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of decision-making (G41B) in 2005 and 2015",x = "Year", y = "Importance of decision-making (G41B)", fill = "Year") + theme_minimal()
effect_size_G41B_05_15 <- cohens_d(data_2005$G41B, data_2015$G41B, paired = FALSE)
effect_size_G41B_05_15
## Cohen's d | 95% CI
## -------------------------
## 0.13 | [-0.03, 0.29]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G41C)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41C n
## <dbl> <dbl> <int>
## 1 2005 2 4
## 2 2005 3 8
## 3 2005 4 50
## 4 2005 5 102
## 5 2005 6 109
## 6 2005 7 75
## 7 2015 0 1
## 8 2015 1 2
## 9 2015 2 3
## 10 2015 3 14
## 11 2015 4 32
## 12 2015 5 76
## 13 2015 6 66
## 14 2015 7 60
Long_format_2005_2015_new_G41C <- Long_format_2005_2015_new %>% filter(G41C > 0)
Long_format_2005_2015_new_G41C %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 348
## 2 2015 253
G41C_Regression_05_15 <- lm(G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G41C)
G41C_Regression_05_15
##
## Call:
## lm(formula = G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G41C)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 2.82459 0.14303 0.94098
## Age_18_graduate:year_new
## -0.05369
summary(G41C_Regression_05_15)
##
## Call:
## lm(formula = G41C ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G41C)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4630 -0.6213 0.3787 0.6264 1.6264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.82459 1.86073 1.518 0.130
## Age_18_graduate 0.14303 0.10012 1.429 0.154
## year_new 0.94098 1.86073 0.506 0.613
## Age_18_graduate:year_new -0.05369 0.10012 -0.536 0.592
##
## Residual standard error: 1.183 on 597 degrees of freedom
## Multiple R-squared: 0.005948, Adjusted R-squared: 0.000953
## F-statistic: 1.191 on 3 and 597 DF, p-value: 0.3125
Long_format_2005_2015_new_G41C$predicted_G41C <- predict(G41C_Regression_05_15)
Long_format_2005_2015_new_G41C$year_factor_G41C <- factor(Long_format_2005_2015_new_G41C$year)
ggplot(Long_format_2005_2015_new_G41C, aes(x = Age_18_graduate, y = G41C, color = factor(year_factor_G41C))) + geom_point(aes(shape = year_factor_G41C), alpha = 0.5) + geom_line(aes(y = predicted_G41C), size = 1) + labs(title = "Importance of challenging work (G41C) by Age and Year", x = "Age",y = "Importance of challenging work (G41C)", color = "Year", shape = "Year") + theme_minimal()
t_test_G41C <- t.test(G41C ~ year_factor_G41C, data = Long_format_2005_2015_new_G41C, var.equal = TRUE)
t_test_G41C
##
## Two Sample t-test
##
## data: G41C by year_factor_G41C
## t = 0.91317, df = 599, p-value = 0.3615
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.1027390 0.2813088
## sample estimates:
## mean in group 2005 mean in group 2015
## 5.520115 5.430830
mean_G41C_0515 <- Long_format_2005_2015_new_G41C %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0515
## # A tibble: 2 × 2
## year average_G41C
## <dbl> <dbl>
## 1 2005 5.52
## 2 2015 5.43
sd_G41C_0515 <- Long_format_2005_2015_new_G41C %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0515
## # A tibble: 2 × 2
## year sd_G41C
## <dbl> <dbl>
## 1 2005 1.12
## 2 2015 1.27
ggplot(Long_format_2005_2015_new_G41C, aes(x = year_factor_G41C, y = G41C, fill = year_factor_G41C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of challenging work (G41C) in 2005 and 2015",x = "Year", y = "Importance of challenging work (G41C)", fill = "Year") + theme_minimal()
data_2005_G41C <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G41C > 0)
data_2015_G41C <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G41C > 0)
effect_size_G41C_05_15 <- cohens_d(data_2005_G41C$G41C, data_2015_G41C$G41C, paired = FALSE)
effect_size_G41C_05_15
## Cohen's d | 95% CI
## -------------------------
## 0.08 | [-0.09, 0.24]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G41H)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41H n
## <dbl> <dbl> <int>
## 1 2005 1 1
## 2 2005 2 2
## 3 2005 3 4
## 4 2005 4 6
## 5 2005 5 37
## 6 2005 6 90
## 7 2005 7 208
## 8 2015 1 1
## 9 2015 2 1
## 10 2015 3 3
## 11 2015 4 10
## 12 2015 5 40
## 13 2015 6 64
## 14 2015 7 135
G41H_Regression_05_15 <- lm(G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
G41H_Regression_05_15
##
## Call:
## lm(formula = G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 5.07656 0.06684 2.92870
## Age_18_graduate:year_new
## -0.16237
summary(G41H_Regression_05_15)
##
## Call:
## lm(formula = G41H ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.2737 -0.2857 0.4970 0.7263 0.8098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.07656 1.54247 3.291 0.00106 **
## Age_18_graduate 0.06684 0.08299 0.805 0.42091
## year_new 2.92870 1.54247 1.899 0.05808 .
## Age_18_graduate:year_new -0.16237 0.08299 -1.956 0.05087 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.981 on 598 degrees of freedom
## Multiple R-squared: 0.01521, Adjusted R-squared: 0.01027
## F-statistic: 3.079 on 3 and 598 DF, p-value: 0.02707
Long_format_2005_2015_new$predicted_G41H <- predict(G41H_Regression_05_15)
Long_format_2005_2015_new$year_factor_G41H <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = G41H, color = factor(year_factor_G41H))) + geom_point(aes(shape = year_factor_G41H), alpha = 0.5) + geom_line(aes(y = predicted_G41H), size = 1) + labs(title = "Importance of healthcare benefits (G41H) by Age and Year", x = "Age",y = "Importance of healthcare benefits (G41H)", color = "Year", shape = "Year") + theme_minimal()
t_test_G41H <- t.test(G41H ~ year_factor_G41H, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_G41H
##
## Two Sample t-test
##
## data: G41H by year_factor_G41H
## t = 1.9789, df = 600, p-value = 0.04829
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## 0.001212977 0.320083068
## sample estimates:
## mean in group 2005 mean in group 2015
## 6.385057 6.224409
mean_G41H_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0515
## # A tibble: 2 × 2
## year average_G41H
## <dbl> <dbl>
## 1 2005 6.39
## 2 2015 6.22
sd_G41H_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0515
## # A tibble: 2 × 2
## year sd_G41H
## <dbl> <dbl>
## 1 2005 0.946
## 2 2015 1.03
ggplot(Long_format_2005_2015_new, aes(x = year_factor_G41H, y = G41H, fill = year_factor_G41H)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of healthcare benefits (G41H) in 2005 and 2015",x = "Year", y = "Importance of healthcare benefits (G41H)", fill = "Year") + theme_minimal()
effect_size_G41H_05_15 <- cohens_d(data_2005$G41H, data_2015$G41H, paired = FALSE)
effect_size_G41H_05_15
## Cohen's d | 95% CI
## ------------------------
## 0.16 | [0.00, 0.33]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(G41P)
## # A tibble: 15 × 3
## # Groups: year [2]
## year G41P n
## <dbl> <dbl> <int>
## 1 2005 1 11
## 2 2005 2 13
## 3 2005 3 32
## 4 2005 4 57
## 5 2005 5 92
## 6 2005 6 76
## 7 2005 7 66
## 8 2005 9 1
## 9 2015 1 11
## 10 2015 2 18
## 11 2015 3 15
## 12 2015 4 47
## 13 2015 5 64
## 14 2015 6 49
## 15 2015 7 50
Long_format_2005_2015_new_G41P <- Long_format_2005_2015_new %>% filter(G41P < 9)
Long_format_2005_2015_new_G41P %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 347
## 2 2015 254
G41P_Regression_05_15 <- lm(G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new_G41P)
G41P_Regression_05_15
##
## Call:
## lm(formula = G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G41P)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 5.101890 -0.007362 2.450055
## Age_18_graduate:year_new
## -0.135023
summary(G41P_Regression_05_15)
##
## Call:
## lm(formula = G41P ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new_G41P)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0774 -0.9497 0.0503 1.0503 2.1534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.101890 2.499282 2.041 0.0417 *
## Age_18_graduate -0.007362 0.134480 -0.055 0.9564
## year_new 2.450055 2.499282 0.980 0.3273
## Age_18_graduate:year_new -0.135023 0.134480 -1.004 0.3158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.589 on 597 degrees of freedom
## Multiple R-squared: 0.002977, Adjusted R-squared: -0.002033
## F-statistic: 0.5941 on 3 and 597 DF, p-value: 0.619
Long_format_2005_2015_new_G41P$predicted_G41P <- predict(G41P_Regression_05_15)
Long_format_2005_2015_new_G41P$year_factor_G41P <- factor(Long_format_2005_2015_new_G41P$year)
ggplot(Long_format_2005_2015_new_G41P, aes(x = Age_18_graduate, y = G41P, color = factor(year_factor_G41P))) + geom_point(aes(shape = year_factor_G41P), alpha = 0.5) + geom_line(aes(y = predicted_G41P), size = 1) + labs(title = "Importance of job central to identity (G41P) by Age and Year", x = "Age",y = "Importance of job central to identity (G41P)", color = "Year", shape = "Year") + theme_minimal()
t_test_G41P <- t.test(G41P ~ year_factor_G41P, data = Long_format_2005_2015_new_G41P, var.equal = TRUE)
t_test_G41P
##
## Two Sample t-test
##
## data: G41P by year_factor_G41P
## t = 0.86887, df = 599, p-value = 0.3853
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.1435382 0.3713174
## sample estimates:
## mean in group 2005 mean in group 2015
## 5.011527 4.897638
mean_G41P_0515 <- Long_format_2005_2015_new_G41P %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0515
## # A tibble: 2 × 2
## year average_G41P
## <dbl> <dbl>
## 1 2005 5.01
## 2 2015 4.90
sd_G41P_0515 <- Long_format_2005_2015_new_G41P %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0515
## # A tibble: 2 × 2
## year sd_G41P
## <dbl> <dbl>
## 1 2005 1.54
## 2 2015 1.65
ggplot(Long_format_2005_2015_new_G41P, aes(x = year_factor_G41P, y = G41P, fill = year_factor_G41P)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job central to identity (G41P) in 2005 and 2015",x = "Year", y = "Importance of job central to identity (G41P)", fill = "Year") + theme_minimal()
data_2005_G41P <- Long_format_2005_2015_new %>% filter(year == 2005) %>% filter(G41P < 9)
data_2015_G41P <- Long_format_2005_2015_new %>% filter(year == 2015) %>% filter(G41P < 9)
effect_size_G41P_05_15 <- cohens_d(data_2005_G41P$G41P, data_2015_G41P$G41P, paired = FALSE)
effect_size_G41P_05_15
## Cohen's d | 95% CI
## -------------------------
## 0.07 | [-0.09, 0.23]
##
## - Estimated using pooled SD.
Long_format_2005_2015_new %>% group_by(year) %>% count(H1)
## # A tibble: 10 × 3
## # Groups: year [2]
## year H1 n
## <dbl> <dbl> <int>
## 1 2005 1 94
## 2 2005 2 145
## 3 2005 3 84
## 4 2005 4 23
## 5 2005 5 2
## 6 2015 1 68
## 7 2015 2 102
## 8 2015 3 59
## 9 2015 4 24
## 10 2015 5 1
H1_Regression_05_15 <- lm(H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new, data = Long_format_2005_2015_new)
H1_Regression_05_15
##
## Call:
## lm(formula = H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Coefficients:
## (Intercept) Age_18_graduate year_new
## 2.33654 -0.01083 -1.78237
## Age_18_graduate:year_new
## 0.09726
summary(H1_Regression_05_15)
##
## Call:
## lm(formula = H1 ~ Age_18_graduate + year_new + Age_18_graduate:year_new,
## data = Long_format_2005_2015_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1963 -1.0651 -0.1732 0.8268 2.9349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.33654 1.44708 1.615 0.107
## Age_18_graduate -0.01083 0.07786 -0.139 0.889
## year_new -1.78237 1.44708 -1.232 0.219
## Age_18_graduate:year_new 0.09726 0.07786 1.249 0.212
##
## Residual standard error: 0.9204 on 598 degrees of freedom
## Multiple R-squared: 0.003433, Adjusted R-squared: -0.001567
## F-statistic: 0.6866 on 3 and 598 DF, p-value: 0.5604
Long_format_2005_2015_new$predicted_H1 <- predict(H1_Regression_05_15)
Long_format_2005_2015_new$year_factor_H1 <- factor(Long_format_2005_2015_new$year)
ggplot(Long_format_2005_2015_new, aes(x = Age_18_graduate, y = H1, color = factor(year_factor_H1))) + geom_point(aes(shape = year_factor_H1), alpha = 0.5) + geom_line(aes(y = predicted_H1), size = 1) + labs(title = "General Health (H1) by Age and Year", x = "Age",y = "General Health (H1)", color = "Year", shape = "Year") + theme_minimal()
t_test_H1 <- t.test(H1 ~ year_factor_H1, data = Long_format_2005_2015_new, var.equal = TRUE)
t_test_H1
##
## Two Sample t-test
##
## data: H1 by year_factor_H1
## t = -0.5882, df = 600, p-value = 0.5566
## alternative hypothesis: true difference in means between group 2005 and group 2015 is not equal to 0
## 95 percent confidence interval:
## -0.1937954 0.1044660
## sample estimates:
## mean in group 2005 mean in group 2015
## 2.120690 2.165354
mean_H1_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0515
## # A tibble: 2 × 2
## year average_H1
## <dbl> <dbl>
## 1 2005 2.12
## 2 2015 2.17
sd_H1_0515 <- Long_format_2005_2015_new %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0515
## # A tibble: 2 × 2
## year sd_H1
## <dbl> <dbl>
## 1 2005 0.903
## 2 2015 0.943
ggplot(Long_format_2005_2015_new, aes(x = year_factor_H1, y = H1, fill = year_factor_H1)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of General Health (H1) in 2005 and 2015",x = "Year", y = "General Health (H1)", fill = "Year") + theme_minimal()
effect_size_H1_05_15 <- cohens_d(data_2005$H1, data_2015$H1, paired = FALSE)
effect_size_H1_05_15
## Cohen's d | 95% CI
## -------------------------
## -0.05 | [-0.21, 0.11]
##
## - Estimated using pooled SD.