We will be going through
library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library (lmerTest)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)
TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx")
view(TAS_data_long_format_age)
head(TAS_data_long_format_age)
## # A tibble: 6 × 42
## TAS TAS05 TAS09 TAS15 `1968 Interview Number` `Person Number` Gender
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 1 1 NA 4 180 2
## 2 2 1 1 NA 5 32 2
## 3 2 1 1 NA 6 34 1
## 4 2 1 1 NA 14 30 1
## 5 1 1 NA NA 18 38 2
## 6 2 1 1 NA 47 34 2
## # ℹ 35 more variables: `Individual is sample` <dbl>, `Year ID Number` <dbl>,
## # `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## # `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## # C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## # E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## # G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## # G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>, …
Filter the data (2005 & 2009)
Long_format_2005_2009 <- TAS_data_long_format_age %>% filter(year < 2010) %>% filter (TAS05 == 1) %>% filter (TAS09 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>% group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2027 ~ Age_18_graduate[year == 2005] + 4, Age_18_graduate == 2023 ~ Age_18_graduate[year == 2009] - 4, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>% group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2009] - Age_18_graduate[year == 2005]) %>% filter(age_difference < 6) %>% filter(age_difference > 2) %>% ungroup()
view(Long_format_2005_2009)
knitr::kable(head(Long_format_2005_2009[, 1:43]))
| TAS | TAS05 | TAS09 | TAS15 | TAS_ID | Gender | Individual is sample | Year ID Number | Sequence Number | Relationship to Head | Release Number | B5A | B5D | B6C | C2D | C2E | C2F | D2D3_month | D2D3_year | E1_1st_mention | E1_2nd_mention | E1_3rd_mention | E3 | G1 | G2_month | G2_year | G10 | G11 | G30A | G41A | G41B | G41C | G41H | G41P | H1 | L7_1st_mention | L7_2nd_mention | L7_3rd_mention | Age_17_graduate | Age_18_graduate | year | year_new | age_difference |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 1 | 1 | NA | 5_32 | 2 | 2 | 624 | 3 | 30 | 5 | 5 | 5 | 5 | 7 | 7 | 7 | 0 | 0 | 1 | 7 | 0 | 0 | 1 | 5 | 2002 | 1 | 1 | 7 | 7 | 6 | 6 | 7 | 5 | 2 | 1 | 0 | 0 | 20 | 21 | 2005 | -1 | 4 |
| 2 | 1 | 1 | NA | 6_34 | 1 | 2 | 1202 | 51 | 30 | 5 | 2 | 2 | 6 | 1 | 1 | 1 | 0 | 0 | 7 | 0 | 0 | 5 | 1 | 5 | 2002 | 1 | 1 | 0 | 7 | 5 | 7 | 5 | 3 | 1 | 1 | 0 | 0 | 20 | 21 | 2005 | -1 | 4 |
| 2 | 1 | 1 | NA | 14_30 | 1 | 2 | 736 | 51 | 30 | 5 | 4 | 4 | 4 | 2 | 1 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 6 | 2003 | 1 | 5 | 6 | 5 | 6 | 6 | 5 | 5 | 2 | 1 | 0 | 0 | 19 | 20 | 2005 | -1 | 4 |
| 2 | 1 | 1 | NA | 47_34 | 2 | 2 | 2516 | 3 | 30 | 5 | 4 | 5 | 6 | 4 | 5 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2005 | 5 | 0 | 6 | 3 | 6 | 4 | 7 | 4 | 1 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 | 4 |
| 2 | 1 | 1 | NA | 53_35 | 2 | 2 | 1392 | 3 | 33 | 5 | 4 | 5 | 5 | 3 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 6 | 2002 | 1 | 1 | 7 | 6 | 7 | 7 | 7 | 5 | 1 | 1 | 0 | 0 | 20 | 21 | 2005 | -1 | 4 |
| 2 | 1 | 1 | NA | 53_36 | 2 | 2 | 1616 | 3 | 30 | 5 | 4 | 5 | 7 | 4 | 1 | 1 | 0 | 0 | 6 | 0 | 0 | 1 | 1 | 6 | 2005 | 1 | 5 | 7 | 7 | 7 | 5 | 7 | 6 | 2 | 1 | 0 | 0 | 17 | 18 | 2005 | -1 | 4 |
count(Long_format_2005_2009, year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 542
## 2 2009 542
count(Long_format_2005_2009, Age_18_graduate)
## # A tibble: 12 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 14 1
## 2 17 7
## 3 18 166
## 4 19 150
## 5 20 141
## 6 21 83
## 7 22 167
## 8 23 150
## 9 24 140
## 10 25 76
## 11 26 2
## 12 27 1
Age count - 2005
Long_format_2005_2009 %>% filter(year == 2005) %>% count(Age_18_graduate)
## # A tibble: 8 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 14 1
## 2 17 7
## 3 18 165
## 4 19 150
## 5 20 141
## 6 21 76
## 7 22 1
## 8 23 1
Age count - 2009
Long_format_2005_2009 %>% filter(year == 2009) %>% count(Age_18_graduate)
## # A tibble: 8 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 18 1
## 2 21 7
## 3 22 166
## 4 23 149
## 5 24 140
## 6 25 76
## 7 26 2
## 8 27 1
SPECIAL: G30A, G41P, H1 (2005 & 2009)
data_2005 <- Long_format_2005_2009 %>% filter(year_new == -1)
data_2009 <- Long_format_2005_2009 %>% filter(year_new == 0)
B5A_t_test_05_09 <- t.test(data_2005$B5A, data_2009$B5A, paired = TRUE)
B5A_t_test_05_09
##
## Paired t-test
##
## data: data_2005$B5A and data_2009$B5A
## t = -14.895, df = 541, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.9376670 -0.7191595
## sample estimates:
## mean difference
## -0.8284133
Long_format_2005_2009 %>% group_by(year) %>% count(B5A)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5A n
## <dbl> <dbl> <int>
## 1 2005 1 25
## 2 2005 2 105
## 3 2005 3 119
## 4 2005 4 167
## 5 2005 5 126
## 6 2009 1 11
## 7 2009 2 29
## 8 2009 3 47
## 9 2009 4 146
## 10 2009 5 309
mean_B5A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0509
## # A tibble: 2 × 2
## year average_B5A
## <dbl> <dbl>
## 1 2005 3.49
## 2 2009 4.32
sd_B5A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0509
## # A tibble: 2 × 2
## year sd_B5A
## <dbl> <dbl>
## 1 2005 1.18
## 2 2009 0.978
Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B5A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Responsibility for Self (B5A) in 2005 and 2009", x = "Year", y = "Responsibility for Self (B5A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
effect_size_B5A_05_09 <- cohens_d(data_2005$B5A, data_2009$B5A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B5A_05_09
## Cohen's d | 95% CI
## --------------------------
## -0.64 | [-0.73, -0.55]
B5D_t_test_05_09 <- t.test(data_2005$B5D, data_2009$B5D, paired = TRUE)
B5D_t_test_05_09
##
## Paired t-test
##
## data: data_2005$B5D and data_2009$B5D
## t = -8.0405, df = 541, p-value = 5.668e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.4499712 -0.2732761
## sample estimates:
## mean difference
## -0.3616236
Long_format_2005_2009 %>% group_by(year) %>% count(B5D)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5D n
## <dbl> <dbl> <int>
## 1 2005 1 9
## 2 2005 2 17
## 3 2005 3 59
## 4 2005 4 140
## 5 2005 5 317
## 6 2009 1 3
## 7 2009 2 9
## 8 2009 3 23
## 9 2009 4 64
## 10 2009 5 443
mean_B5D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0509
## # A tibble: 2 × 2
## year average_B5D
## <dbl> <dbl>
## 1 2005 4.36
## 2 2009 4.73
sd_B5D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0509
## # A tibble: 2 × 2
## year sd_B5D
## <dbl> <dbl>
## 1 2005 0.915
## 2 2009 0.672
Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B5D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Managing own money (B5D) in 2005 and 2009", x = "Year", y = "Managing own money (B5D)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_B5D_05_09 <- cohens_d(data_2005$B5D, data_2009$B5D, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B5D_05_09
## Cohen's d | 95% CI
## --------------------------
## -0.35 | [-0.43, -0.26]
B6C_t_test_05_09 <- t.test(data_2005$B6C, data_2009$B6C, paired = TRUE)
B6C_t_test_05_09
##
## Paired t-test
##
## data: data_2005$B6C and data_2009$B6C
## t = -1.4667, df = 541, p-value = 0.143
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.20285228 0.02942055
## sample estimates:
## mean difference
## -0.08671587
Long_format_2005_2009 %>% group_by(year) %>% count(B6C)
## # A tibble: 14 × 3
## # Groups: year [2]
## year B6C n
## <dbl> <dbl> <int>
## 1 2005 1 9
## 2 2005 2 21
## 3 2005 3 30
## 4 2005 4 71
## 5 2005 5 163
## 6 2005 6 115
## 7 2005 7 133
## 8 2009 1 4
## 9 2009 2 10
## 10 2009 3 22
## 11 2009 4 88
## 12 2009 5 163
## 13 2009 6 134
## 14 2009 7 121
mean_B6C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0509
## # A tibble: 2 × 2
## year average_B6C
## <dbl> <dbl>
## 1 2005 5.28
## 2 2009 5.37
sd_B6C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0509
## # A tibble: 2 × 2
## year sd_B6C
## <dbl> <dbl>
## 1 2005 1.44
## 2 2009 1.27
Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B6C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Money management skills (B6C) in 2005 and 2009", x = "Year", y = "Money management skills (B6C)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_B6C_05_09 <- cohens_d(data_2005$B6C, data_2009$B6C, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B6C_05_09
## Cohen's d | 95% CI
## -------------------------
## -0.06 | [-0.15, 0.02]
C2D_t_test_05_09 <- t.test(data_2005$C2D, data_2009$C2D, paired = TRUE)
C2D_t_test_05_09
##
## Paired t-test
##
## data: data_2005$C2D and data_2009$C2D
## t = -3.0212, df = 541, p-value = 0.002636
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.46582598 -0.09874967
## sample estimates:
## mean difference
## -0.2822878
Long_format_2005_2009 %>% group_by(year) %>% count(C2D)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2D n
## <dbl> <dbl> <int>
## 1 2005 1 82
## 2 2005 2 87
## 3 2005 3 96
## 4 2005 4 98
## 5 2005 5 81
## 6 2005 6 48
## 7 2005 7 50
## 8 2009 1 60
## 9 2009 2 85
## 10 2009 3 90
## 11 2009 4 97
## 12 2009 5 82
## 13 2009 6 62
## 14 2009 7 66
mean_C2D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0509
## # A tibble: 2 × 2
## year average_C2D
## <dbl> <dbl>
## 1 2005 3.65
## 2 2009 3.93
sd_C2D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0509
## # A tibble: 2 × 2
## year sd_C2D
## <dbl> <dbl>
## 1 2005 1.84
## 2 2009 1.87
ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about expenses (C2D) in 2005 and 2009", x = "Year", y = "Worry about expenses (C2D)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_C2D_05_09 <- cohens_d(data_2005$C2D, data_2009$C2D, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2D_05_09
## Cohen's d | 95% CI
## --------------------------
## -0.13 | [-0.21, -0.05]
C2E_t_test_05_09 <- t.test(data_2005$C2E, data_2009$C2E, paired = TRUE)
C2E_t_test_05_09
##
## Paired t-test
##
## data: data_2005$C2E and data_2009$C2E
## t = -2.5726, df = 541, p-value = 0.01036
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.42299611 -0.05670869
## sample estimates:
## mean difference
## -0.2398524
Long_format_2005_2009 %>% group_by(year) %>% count(C2E)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2E n
## <dbl> <dbl> <int>
## 1 2005 1 103
## 2 2005 2 101
## 3 2005 3 89
## 4 2005 4 65
## 5 2005 5 86
## 6 2005 6 59
## 7 2005 7 39
## 8 2009 1 75
## 9 2009 2 100
## 10 2009 3 90
## 11 2009 4 90
## 12 2009 5 64
## 13 2009 6 67
## 14 2009 7 56
mean_C2E_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0509
## # A tibble: 2 × 2
## year average_C2E
## <dbl> <dbl>
## 1 2005 3.49
## 2 2009 3.73
sd_C2E_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0509
## # A tibble: 2 × 2
## year sd_C2E
## <dbl> <dbl>
## 1 2005 1.90
## 2 2009 1.90
ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2E, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about future job (C2E) in 2005 and 2009", x = "Year", y = "Worry about future job (C2E)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_C2E_05_09 <- cohens_d(data_2005$C2E, data_2009$C2E, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2E_05_09
## Cohen's d | 95% CI
## --------------------------
## -0.11 | [-0.19, -0.03]
C2F_t_test_05_09 <- t.test(data_2005$C2F, data_2009$C2F, paired = TRUE)
C2F_t_test_05_09
##
## Paired t-test
##
## data: data_2005$C2F and data_2009$C2F
## t = -2.7143, df = 541, p-value = 0.006854
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.38799401 -0.06219049
## sample estimates:
## mean difference
## -0.2250923
Long_format_2005_2009 %>% group_by(year) %>% count(C2F)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2F n
## <dbl> <dbl> <int>
## 1 2005 1 125
## 2 2005 2 132
## 3 2005 3 93
## 4 2005 4 81
## 5 2005 5 56
## 6 2005 6 32
## 7 2005 7 23
## 8 2009 1 94
## 9 2009 2 128
## 10 2009 3 109
## 11 2009 4 71
## 12 2009 5 78
## 13 2009 6 38
## 14 2009 7 24
mean_C2F_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0509
## # A tibble: 2 × 2
## year average_C2F
## <dbl> <dbl>
## 1 2005 3.00
## 2 2009 3.22
sd_C2F_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0509
## # A tibble: 2 × 2
## year sd_C2F
## <dbl> <dbl>
## 1 2005 1.72
## 2 2009 1.71
ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2F, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Discouraged about future (C2F) in 2005 and 2009", x = "Year", y = "Discouraged about future (C2F)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_C2F_05_09 <- cohens_d(data_2005$C2F, data_2009$C2F, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2F_05_09
## Cohen's d | 95% CI
## --------------------------
## -0.12 | [-0.20, -0.03]
Long_format_2005_2009 %>% group_by(year) %>% count(G30A)
## # A tibble: 15 × 3
## # Groups: year [2]
## year G30A n
## <dbl> <dbl> <int>
## 1 2005 0 56
## 2 2005 1 1
## 3 2005 2 1
## 4 2005 3 5
## 5 2005 4 26
## 6 2005 5 91
## 7 2005 6 180
## 8 2005 7 182
## 9 2009 1 3
## 10 2009 2 4
## 11 2009 3 5
## 12 2009 4 32
## 13 2009 5 136
## 14 2009 6 168
## 15 2009 7 194
Long_format_2005_2009 %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 542
## 2 2009 542
remove_id <- Long_format_2005_2009 %>% filter(year_new == -1, G30A == 0) %>% pull(TAS_ID)
data_2005_G30A <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(G30A != 0)
data_2009_G30A <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id))
G30A_t_test_05_09 <- t.test(data_2005_G30A$G30A, data_2009_G30A$G30A, paired = TRUE)
G30A_t_test_05_09
##
## Paired t-test
##
## data: data_2005_G30A$G30A and data_2009_G30A$G30A
## t = 2.5203, df = 485, p-value = 0.01205
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.03083453 0.24900086
## sample estimates:
## mean difference
## 0.1399177
Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G30A) %>% ungroup()
## # A tibble: 14 × 3
## year G30A n
## <dbl> <dbl> <int>
## 1 2005 1 1
## 2 2005 2 1
## 3 2005 3 5
## 4 2005 4 26
## 5 2005 5 91
## 6 2005 6 180
## 7 2005 7 182
## 8 2009 1 3
## 9 2009 2 4
## 10 2009 3 5
## 11 2009 4 26
## 12 2009 5 127
## 13 2009 6 149
## 14 2009 7 172
Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 486
## 2 2009 486
mean_G30A_0509 <- Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0509
## # A tibble: 2 × 2
## year average_G30A
## <dbl> <dbl>
## 1 2005 6.03
## 2 2009 5.89
sd_G30A_0509 <- Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0509
## # A tibble: 2 × 2
## year sd_G30A
## <dbl> <dbl>
## 1 2005 0.977
## 2 2009 1.09
Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G30A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Likelihood of well-paying job (G30A) in 2005 and 2009", x = "Year", y = "Likelihood of well-paying job (G30A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G30A_05_09 <- cohens_d(data_2005_G30A$G30A, data_2009_G30A$G30A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G30A_05_09
## Cohen's d | 95% CI
## ------------------------
## 0.11 | [0.03, 0.20]
G41A_t_test_05_09 <- t.test(data_2005$G41A, data_2009$G41A, paired = TRUE)
G41A_t_test_05_09
##
## Paired t-test
##
## data: data_2005$G41A and data_2009$G41A
## t = 8.1936, df = 541, p-value = 1.841e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.4614852 0.7525369
## sample estimates:
## mean difference
## 0.6070111
Long_format_2005_2009 %>% group_by(year) %>% count(G41A)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41A n
## <dbl> <dbl> <int>
## 1 2005 1 22
## 2 2005 2 24
## 3 2005 3 37
## 4 2005 4 61
## 5 2005 5 127
## 6 2005 6 117
## 7 2005 7 154
## 8 2009 1 48
## 9 2009 2 42
## 10 2009 3 42
## 11 2009 4 84
## 12 2009 5 140
## 13 2009 6 85
## 14 2009 7 101
mean_G41A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0509
## # A tibble: 2 × 2
## year average_G41A
## <dbl> <dbl>
## 1 2005 5.24
## 2 2009 4.63
sd_G41A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0509
## # A tibble: 2 × 2
## year sd_G41A
## <dbl> <dbl>
## 1 2005 1.65
## 2 2009 1.83
ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job status (G41A) in 2005 and 2009", x = "Year", y = "Importance of job status (G41A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G41A_05_09 <- cohens_d(data_2005$G41A, data_2009$G41A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41A_05_09
## Cohen's d | 95% CI
## ------------------------
## 0.35 | [0.27, 0.44]
G41B_t_test_05_09 <- t.test(data_2005$G41B, data_2009$G41B, paired = TRUE)
G41B_t_test_05_09
##
## Paired t-test
##
## data: data_2005$G41B and data_2009$G41B
## t = 1.8469, df = 541, p-value = 0.0653
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.006920177 0.224632354
## sample estimates:
## mean difference
## 0.1088561
Long_format_2005_2009 %>% group_by(year) %>% count(G41B)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41B n
## <dbl> <dbl> <int>
## 1 2005 1 2
## 2 2005 2 5
## 3 2005 3 14
## 4 2005 4 41
## 5 2005 5 135
## 6 2005 6 198
## 7 2005 7 147
## 8 2009 1 7
## 9 2009 2 5
## 10 2009 3 14
## 11 2009 4 48
## 12 2009 5 153
## 13 2009 6 170
## 14 2009 7 145
mean_G41B_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0509
## # A tibble: 2 × 2
## year average_G41B
## <dbl> <dbl>
## 1 2005 5.74
## 2 2009 5.63
sd_G41B_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0509
## # A tibble: 2 × 2
## year sd_G41B
## <dbl> <dbl>
## 1 2005 1.11
## 2 2009 1.21
ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41B, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of decision-making (G41B) in 2005 and 2009", x = "Year", y = "Importance of decision-making (G41B)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G41B_05_09 <- cohens_d(data_2005$G41B, data_2009$G41B, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41B_05_09
## Cohen's d | 95% CI
## -------------------------
## 0.08 | [-0.01, 0.16]
G41C_t_test_05_09 <- t.test(data_2005$G41C, data_2009$G41C, paired = TRUE)
G41C_t_test_05_09
##
## Paired t-test
##
## data: data_2005$G41C and data_2009$G41C
## t = -1.2232, df = 541, p-value = 0.2218
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.18270369 0.04248229
## sample estimates:
## mean difference
## -0.0701107
Long_format_2005_2009 %>% group_by(year) %>% count(G41C)
## # A tibble: 13 × 3
## # Groups: year [2]
## year G41C n
## <dbl> <dbl> <int>
## 1 2005 2 6
## 2 2005 3 21
## 3 2005 4 67
## 4 2005 5 160
## 5 2005 6 168
## 6 2005 7 120
## 7 2009 1 4
## 8 2009 2 8
## 9 2009 3 13
## 10 2009 4 52
## 11 2009 5 161
## 12 2009 6 171
## 13 2009 7 133
mean_G41C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0509
## # A tibble: 2 × 2
## year average_G41C
## <dbl> <dbl>
## 1 2005 5.52
## 2 2009 5.59
sd_G41C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0509
## # A tibble: 2 × 2
## year sd_G41C
## <dbl> <dbl>
## 1 2005 1.14
## 2 2009 1.18
ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of challenging work (G41C) in 2005 and 2009", x = "Year", y = "Importance of challenging work (G41C)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G41C_05_09 <- cohens_d(data_2005$G41C, data_2009$G41C, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41C_05_09
## Cohen's d | 95% CI
## -------------------------
## -0.05 | [-0.14, 0.03]
G41H_t_test_05_09 <- t.test(data_2005$G41H, data_2009$G41H, paired = TRUE)
G41H_t_test_05_09
##
## Paired t-test
##
## data: data_2005$G41H and data_2009$G41H
## t = 1.4269, df = 541, p-value = 0.1542
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.0291852 0.1841668
## sample estimates:
## mean difference
## 0.07749077
Long_format_2005_2009 %>% group_by(year) %>% count(G41H)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41H n
## <dbl> <dbl> <int>
## 1 2005 1 1
## 2 2005 2 3
## 3 2005 3 4
## 4 2005 4 21
## 5 2005 5 56
## 6 2005 6 142
## 7 2005 7 315
## 8 2009 1 7
## 9 2009 2 5
## 10 2009 3 6
## 11 2009 4 22
## 12 2009 5 55
## 13 2009 6 129
## 14 2009 7 318
mean_G41H_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0509
## # A tibble: 2 × 2
## year average_G41H
## <dbl> <dbl>
## 1 2005 6.35
## 2 2009 6.27
sd_G41H_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0509
## # A tibble: 2 × 2
## year sd_G41H
## <dbl> <dbl>
## 1 2005 0.960
## 2 2009 1.16
ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41H, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of healthcare benefits (G41H) in 2005 and 2009", x = "Year", y = "Importance of healthcare benefits (G41H)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G41H_05_09 <- cohens_d(data_2005$G41H, data_2009$G41H, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41H_05_09
## Cohen's d | 95% CI
## -------------------------
## 0.06 | [-0.02, 0.15]
Long_format_2005_2009 %>% group_by(year) %>% count(G41P)
## # A tibble: 15 × 3
## # Groups: year [2]
## year G41P n
## <dbl> <dbl> <int>
## 1 2005 1 18
## 2 2005 2 18
## 3 2005 3 48
## 4 2005 4 94
## 5 2005 5 138
## 6 2005 6 123
## 7 2005 7 102
## 8 2005 9 1
## 9 2009 1 44
## 10 2009 2 56
## 11 2009 3 46
## 12 2009 4 117
## 13 2009 5 129
## 14 2009 6 72
## 15 2009 7 78
remove_id_G41P <- Long_format_2005_2009 %>% filter(year_new == -1, G41P == 9) %>% pull(TAS_ID)
data_2005_G41P <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(G41P != 9)
data_2009_G41P <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id_G41P))
G41P_t_test_05_09 <- t.test(data_2005_G41P$G41P, data_2009_G41P$G41P, paired = TRUE)
G41P_t_test_05_09
##
## Paired t-test
##
## data: data_2005_G41P$G41P and data_2009_G41P$G41P
## t = 7.5301, df = 540, p-value = 2.144e-13
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.4576866 0.7807607
## sample estimates:
## mean difference
## 0.6192237
Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41P)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41P n
## <dbl> <dbl> <int>
## 1 2005 1 18
## 2 2005 2 18
## 3 2005 3 48
## 4 2005 4 94
## 5 2005 5 138
## 6 2005 6 123
## 7 2005 7 102
## 8 2009 1 44
## 9 2009 2 56
## 10 2009 3 46
## 11 2009 4 116
## 12 2009 5 129
## 13 2009 6 72
## 14 2009 7 78
Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 541
## 2 2009 541
mean_G41P_0509 <- Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0509
## # A tibble: 2 × 2
## year average_G41P
## <dbl> <dbl>
## 1 2005 5.02
## 2 2009 4.40
sd_G41P_0509 <- Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0509
## # A tibble: 2 × 2
## year sd_G41P
## <dbl> <dbl>
## 1 2005 1.53
## 2 2009 1.77
Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41P, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job central to identity (G41P) in 2005 and 2009", x = "Year", y = "Importance of job central to identity (G41P)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_G41P_05_09 <- cohens_d(data_2005_G41P$G41P, data_2009_G41P$G41P, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41P_05_09
## Cohen's d | 95% CI
## ------------------------
## 0.32 | [0.24, 0.41]
Long_format_2005_2009 %>% group_by(year) %>% count(H1)
## # A tibble: 12 × 3
## # Groups: year [2]
## year H1 n
## <dbl> <dbl> <int>
## 1 2005 1 142
## 2 2005 2 222
## 3 2005 3 140
## 4 2005 4 33
## 5 2005 5 4
## 6 2005 9 1
## 7 2009 1 121
## 8 2009 2 234
## 9 2009 3 147
## 10 2009 4 37
## 11 2009 5 2
## 12 2009 9 1
remove_id_H1_05 <- Long_format_2005_2009 %>% filter(year_new == -1,H1 == 9) %>% pull(TAS_ID)
remove_id_H1_09 <- Long_format_2005_2009 %>% filter(year_new == 0, H1 == 9) %>% pull(TAS_ID)
data_2005_H1 <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(H1 != 9) %>% filter(!(TAS_ID %in% remove_id_H1_09))
data_2009_H1 <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(H1 != 9) %>% filter(!(TAS_ID %in% remove_id_H1_05))
H1_t_test_05_09 <- t.test(data_2005_H1$H1, data_2009_H1$H1, paired = TRUE)
H1_t_test_05_09
##
## Paired t-test
##
## data: data_2005_H1$H1 and data_2009_H1$H1
## t = -1.3779, df = 539, p-value = 0.1688
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.13924890 0.02443409
## sample estimates:
## mean difference
## -0.05740741
Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(H1)
## # A tibble: 10 × 3
## # Groups: year [2]
## year H1 n
## <dbl> <dbl> <int>
## 1 2005 1 142
## 2 2005 2 222
## 3 2005 3 139
## 4 2005 4 33
## 5 2005 5 4
## 6 2009 1 121
## 7 2009 2 233
## 8 2009 3 147
## 9 2009 4 37
## 10 2009 5 2
Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2005 540
## 2 2009 540
mean_H1_0509 <- Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0509
## # A tibble: 2 × 2
## year average_H1
## <dbl> <dbl>
## 1 2005 2.14
## 2 2009 2.20
sd_H1_0509 <- Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0509
## # A tibble: 2 × 2
## year sd_H1
## <dbl> <dbl>
## 1 2005 0.902
## 2 2009 0.876
Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = H1, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of General Health (H1) in 2005 and 2009", x = "Year", y = "General Health (H1)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()
effect_size_H1_05_09 <- cohens_d(data_2005_H1$H1, data_2009_H1$H1, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_H1_05_09
## Cohen's d | 95% CI
## -------------------------
## -0.06 | [-0.14, 0.03]
Filter the data (2009 & 2015)
Long_format_2009_2015 <- TAS_data_long_format_age %>% filter (TAS09 == 1) %>% filter (TAS15 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>% group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2033 ~ Age_18_graduate[year == 2009] + 6, Age_18_graduate == 2027 ~ Age_18_graduate[year == 2015] - 6, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>% group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2015] - Age_18_graduate[year == 2009]) %>% filter(age_difference < 8) %>% filter(age_difference > 4) %>% ungroup()
view(Long_format_2009_2015)
knitr::kable(head(Long_format_2009_2015[, 1:43]))
| TAS | TAS05 | TAS09 | TAS15 | TAS_ID | Gender | Individual is sample | Year ID Number | Sequence Number | Relationship to Head | Release Number | B5A | B5D | B6C | C2D | C2E | C2F | D2D3_month | D2D3_year | E1_1st_mention | E1_2nd_mention | E1_3rd_mention | E3 | G1 | G2_month | G2_year | G10 | G11 | G30A | G41A | G41B | G41C | G41H | G41P | H1 | L7_1st_mention | L7_2nd_mention | L7_3rd_mention | Age_17_graduate | Age_18_graduate | year | year_new | age_difference |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | NA | 1 | 1 | 4_39 | 2 | 2 | 13 | 3 | 60 | 3 | 4 | 5 | 4 | 6 | 7 | 5 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2008 | 1 | 5 | 5 | 6 | 5 | 2 | 7 | 6 | 2 | 1 | 0 | 0 | 18 | 19 | 2009 | 0 | 6 |
| 2 | NA | 1 | 1 | 7_40 | 2 | 2 | 3836 | 2 | 22 | 3 | 2 | 2 | 7 | 7 | 3 | 4 | 0 | 0 | 6 | 0 | 0 | 5 | 1 | 6 | 2007 | 5 | 0 | 5 | 5 | 2 | 5 | 6 | 5 | 3 | 1 | 0 | 0 | 19 | 20 | 2009 | 0 | 6 |
| 2 | NA | 1 | 1 | 7_41 | 1 | 2 | 576 | 2 | 30 | 3 | 3 | 4 | 7 | 4 | 5 | 4 | 0 | 0 | 3 | 0 | 0 | 5 | 1 | 5 | 2009 | 5 | 0 | 7 | 5 | 6 | 5 | 7 | 5 | 2 | 1 | 0 | 0 | 17 | 18 | 2009 | 0 | 6 |
| 2 | NA | 1 | 1 | 10_34 | 2 | 2 | 3276 | 3 | 30 | 3 | 4 | 5 | 6 | 4 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 6 | 2008 | 1 | 5 | 7 | 7 | 5 | 4 | 7 | 5 | 2 | 2 | 0 | 0 | 18 | 19 | 2009 | 0 | 6 |
| 2 | NA | 1 | 1 | 14_31 | 2 | 2 | 713 | 1 | 10 | 3 | 5 | 5 | 7 | 4 | 4 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 6 | 2005 | 5 | 0 | 6 | 5 | 7 | 6 | 7 | 2 | 4 | 1 | 0 | 0 | 21 | 22 | 2009 | 0 | 6 |
| 2 | NA | 1 | 1 | 22_30 | 2 | 2 | 907 | 2 | 30 | 3 | 5 | 1 | 4 | 3 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2006 | 1 | 1 | 7 | 6 | 6 | 6 | 6 | 6 | 1 | 2 | 0 | 0 | 20 | 21 | 2009 | 0 | 6 |
count(Long_format_2009_2015, year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 515
## 2 2015 515
count(Long_format_2009_2015, Age_18_graduate)
## # A tibble: 11 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 15 1
## 2 18 154
## 3 19 134
## 4 20 136
## 5 21 89
## 6 22 2
## 7 24 155
## 8 25 134
## 9 26 136
## 10 27 87
## 11 28 2
Age count - 2009
Long_format_2009_2015 %>% filter(year == 2009) %>% count(Age_18_graduate)
## # A tibble: 6 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 15 1
## 2 18 154
## 3 19 134
## 4 20 136
## 5 21 88
## 6 22 2
Age count - 2015
Long_format_2009_2015 %>% filter(year == 2015) %>% count(Age_18_graduate)
## # A tibble: 6 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 21 1
## 2 24 155
## 3 25 134
## 4 26 136
## 5 27 87
## 6 28 2
data_2009 <- Long_format_2009_2015 %>% filter(year_new == 0)
data_2015 <- Long_format_2009_2015 %>% filter(year_new == 1)
SPECIAL: B5A, B5D, G41A, G41P (2009 & 2015)
Long_format_2009_2015 %>% group_by(year) %>% count(B5A)
## # A tibble: 12 × 3
## # Groups: year [2]
## year B5A n
## <dbl> <dbl> <int>
## 1 2009 1 34
## 2 2009 2 94
## 3 2009 3 113
## 4 2009 4 157
## 5 2009 5 116
## 6 2009 9 1
## 7 2015 1 12
## 8 2015 2 15
## 9 2015 3 35
## 10 2015 4 92
## 11 2015 5 359
## 12 2015 8 2
remove_id_B5A_09 <- Long_format_2009_2015 %>% filter(year_new == 0,B5A > 7) %>% pull(TAS_ID)
remove_id_B5A_15 <- Long_format_2009_2015 %>% filter(year_new == 1, B5A > 7) %>% pull(TAS_ID)
data_2009_B5A <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(B5A < 7) %>% filter(!(TAS_ID %in% remove_id_B5A_15))
data_2015_B5A <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(B5A < 7) %>% filter(!(TAS_ID %in% remove_id_B5A_09))
B5A_t_test_09_15 <- t.test(data_2009_B5A$B5A, data_2015_B5A$B5A, paired = TRUE)
B5A_t_test_09_15
##
## Paired t-test
##
## data: data_2009_B5A$B5A and data_2015_B5A$B5A
## t = -17.111, df = 511, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.1823174 -0.9387764
## sample estimates:
## mean difference
## -1.060547
Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(B5A)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5A n
## <dbl> <dbl> <int>
## 1 2009 1 34
## 2 2009 2 94
## 3 2009 3 111
## 4 2009 4 157
## 5 2009 5 116
## 6 2015 1 12
## 7 2015 2 15
## 8 2015 3 35
## 9 2015 4 91
## 10 2015 5 359
Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 512
## 2 2015 512
mean_B5A_0915 <- Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0915
## # A tibble: 2 × 2
## year average_B5A
## <dbl> <dbl>
## 1 2009 3.44
## 2 2015 4.50
sd_B5A_0915 <- Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0915
## # A tibble: 2 × 2
## year sd_B5A
## <dbl> <dbl>
## 1 2009 1.21
## 2 2015 0.919
Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = B5A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Responsibility for Self (B5A) in 2009 and 2015", x = "Year", y = "Responsibility for Self (B5A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_B5A_09_15 <- cohens_d(data_2009_B5A$B5A, data_2015_B5A$B5A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B5A_09_15
## Cohen's d | 95% CI
## --------------------------
## -0.76 | [-0.85, -0.66]
Long_format_2009_2015 %>% group_by(year) %>% count(B5D)
## # A tibble: 11 × 3
## # Groups: year [2]
## year B5D n
## <dbl> <dbl> <int>
## 1 2009 1 16
## 2 2009 2 15
## 3 2009 3 41
## 4 2009 4 123
## 5 2009 5 319
## 6 2009 9 1
## 7 2015 1 4
## 8 2015 2 12
## 9 2015 3 19
## 10 2015 4 43
## 11 2015 5 437
remove_id_B5D <- Long_format_2009_2015 %>% filter(year_new == 0,B5D == 9) %>% pull(TAS_ID)
data_2009_B5D <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(B5A != 9)
data_2015_B5D <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(!(TAS_ID %in% remove_id_B5D))
Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(B5D)
## # A tibble: 10 × 3
## # Groups: year [2]
## year B5D n
## <dbl> <dbl> <int>
## 1 2009 1 16
## 2 2009 2 15
## 3 2009 3 41
## 4 2009 4 123
## 5 2009 5 319
## 6 2015 1 4
## 7 2015 2 12
## 8 2015 3 18
## 9 2015 4 43
## 10 2015 5 437
Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 514
## 2 2015 514
B5D_t_test_09_15 <- t.test(data_2009_B5D$B5D, data_2015_B5D$B5D, paired = TRUE)
B5D_t_test_09_15
##
## Paired t-test
##
## data: data_2009_B5D$B5D and data_2015_B5D$B5D
## t = -6.8478, df = 513, p-value = 2.151e-11
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.445656 -0.246951
## sample estimates:
## mean difference
## -0.3463035
mean_B5D_0915 <- Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0915
## # A tibble: 2 × 2
## year average_B5D
## <dbl> <dbl>
## 1 2009 4.39
## 2 2015 4.75
sd_B5D_0915 <- Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0915
## # A tibble: 2 × 2
## year sd_B5D
## <dbl> <dbl>
## 1 2009 0.974
## 2 2015 0.703
Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = B5D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Managing own money (B5D) in 2009 and 2015", x = "Year", y = "Managing own money (B5D)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_B5D_09_15 <- cohens_d(data_2009_B5D$B5D, data_2015_B5D$B5D, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B5D_09_15
## Cohen's d | 95% CI
## --------------------------
## -0.30 | [-0.39, -0.21]
B6C_t_test_09_15 <- t.test(data_2009$B6C, data_2015$B6C, paired = TRUE)
B6C_t_test_09_15
##
## Paired t-test
##
## data: data_2009$B6C and data_2015$B6C
## t = 1.5235, df = 514, p-value = 0.1282
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.02586079 0.20450156
## sample estimates:
## mean difference
## 0.08932039
Long_format_2009_2015 %>% group_by(year) %>% count(B6C)
## # A tibble: 14 × 3
## # Groups: year [2]
## year B6C n
## <dbl> <dbl> <int>
## 1 2009 1 4
## 2 2009 2 11
## 3 2009 3 17
## 4 2009 4 62
## 5 2009 5 154
## 6 2009 6 139
## 7 2009 7 128
## 8 2015 1 4
## 9 2015 2 7
## 10 2015 3 27
## 11 2015 4 74
## 12 2015 5 140
## 13 2015 6 157
## 14 2015 7 106
mean_B6C_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0915
## # A tibble: 2 × 2
## year average_B6C
## <dbl> <dbl>
## 1 2009 5.49
## 2 2015 5.40
sd_B6C_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0915
## # A tibble: 2 × 2
## year sd_B6C
## <dbl> <dbl>
## 1 2009 1.27
## 2 2015 1.25
ggplot(Long_format_2009_2015, aes(x = factor(year), y = B6C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Money management skills (B6C) in 2009 and 2015", x = "Year", y = "Money management skills (B6C)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_B6C_09_15 <- cohens_d(data_2009$B6C, data_2015$B6C, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_B6C_09_15
## Cohen's d | 95% CI
## -------------------------
## 0.07 | [-0.02, 0.15]
C2D_t_test_09_15 <- t.test(data_2009$C2D, data_2015$C2D, paired = TRUE)
C2D_t_test_09_15
##
## Paired t-test
##
## data: data_2009$C2D and data_2015$C2D
## t = 3.2447, df = 514, p-value = 0.001252
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1256330 0.5112602
## sample estimates:
## mean difference
## 0.3184466
Long_format_2009_2015 %>% group_by(year) %>% count(C2D)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2D n
## <dbl> <dbl> <int>
## 1 2009 1 77
## 2 2009 2 97
## 3 2009 3 65
## 4 2009 4 89
## 5 2009 5 79
## 6 2009 6 49
## 7 2009 7 59
## 8 2015 1 98
## 9 2015 2 99
## 10 2015 3 84
## 11 2015 4 78
## 12 2015 5 73
## 13 2015 6 46
## 14 2015 7 37
mean_C2D_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0915
## # A tibble: 2 × 2
## year average_C2D
## <dbl> <dbl>
## 1 2009 3.74
## 2 2015 3.42
sd_C2D_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0915
## # A tibble: 2 × 2
## year sd_C2D
## <dbl> <dbl>
## 1 2009 1.93
## 2 2015 1.86
ggplot(Long_format_2009_2015, aes(x = factor(year), y = C2D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about expenses (C2D) in 2009 and 2015", x = "Year", y = "Worry about expenses (C2D)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_C2D_09_15 <- cohens_d(data_2009$C2D, data_2015$C2D, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2D_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.14 | [0.06, 0.23]
C2E_t_test_09_15 <- t.test(data_2009$C2E, data_2015$C2E, paired = TRUE)
C2E_t_test_09_15
##
## Paired t-test
##
## data: data_2009$C2E and data_2015$C2E
## t = 5.1874, df = 514, p-value = 3.073e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.2907333 0.6451890
## sample estimates:
## mean difference
## 0.4679612
Long_format_2009_2015 %>% group_by(year) %>% count(C2E)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2E n
## <dbl> <dbl> <int>
## 1 2009 1 89
## 2 2009 2 89
## 3 2009 3 77
## 4 2009 4 86
## 5 2009 5 76
## 6 2009 6 36
## 7 2009 7 62
## 8 2015 1 109
## 9 2015 2 111
## 10 2015 3 96
## 11 2015 4 74
## 12 2015 5 67
## 13 2015 6 25
## 14 2015 7 33
mean_C2E_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0915
## # A tibble: 2 × 2
## year average_C2E
## <dbl> <dbl>
## 1 2009 3.63
## 2 2015 3.17
sd_C2E_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0915
## # A tibble: 2 × 2
## year sd_C2E
## <dbl> <dbl>
## 1 2009 1.94
## 2 2015 1.78
ggplot(Long_format_2009_2015, aes(x = factor(year), y = C2E, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about future job (C2E) in 2009 and 2015", x = "Year", y = "Worry about future job (C2E)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_C2E_09_15 <- cohens_d(data_2009$C2E, data_2015$C2E, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2E_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.23 | [0.14, 0.32]
C2F_t_test_09_15 <- t.test(data_2009$C2F, data_2015$C2F, paired = TRUE)
C2F_t_test_09_15
##
## Paired t-test
##
## data: data_2009$C2F and data_2015$C2F
## t = 2.8413, df = 514, p-value = 0.004672
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.07009728 0.38427165
## sample estimates:
## mean difference
## 0.2271845
Long_format_2009_2015 %>% group_by(year) %>% count(C2F)
## # A tibble: 14 × 3
## # Groups: year [2]
## year C2F n
## <dbl> <dbl> <int>
## 1 2009 1 108
## 2 2009 2 121
## 3 2009 3 96
## 4 2009 4 87
## 5 2009 5 47
## 6 2009 6 27
## 7 2009 7 29
## 8 2015 1 123
## 9 2015 2 144
## 10 2015 3 87
## 11 2015 4 71
## 12 2015 5 48
## 13 2015 6 21
## 14 2015 7 21
mean_C2F_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0915
## # A tibble: 2 × 2
## year average_C2F
## <dbl> <dbl>
## 1 2009 3.08
## 2 2015 2.85
sd_C2F_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0915
## # A tibble: 2 × 2
## year sd_C2F
## <dbl> <dbl>
## 1 2009 1.73
## 2 2015 1.66
ggplot(Long_format_2009_2015, aes(x = factor(year), y =C2F, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Discouraged about future (C2F) in 2009 and 2015", x = "Year", y = "Discouraged about future (C2F)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_C2F_09_15 <- cohens_d(data_2009$C2F, data_2015$C2F, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_C2F_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.13 | [0.04, 0.21]
G30A_t_test_09_15 <- t.test(data_2009$G30A, data_2015$G30A, paired = TRUE)
G30A_t_test_09_15
##
## Paired t-test
##
## data: data_2009$G30A and data_2015$G30A
## t = 0.78506, df = 514, p-value = 0.4328
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.06126544 0.14281884
## sample estimates:
## mean difference
## 0.0407767
Long_format_2009_2015 %>% group_by(year) %>% count(G30A)
## # A tibble: 13 × 3
## # Groups: year [2]
## year G30A n
## <dbl> <dbl> <int>
## 1 2009 1 3
## 2 2009 3 3
## 3 2009 4 25
## 4 2009 5 108
## 5 2009 6 171
## 6 2009 7 205
## 7 2015 1 3
## 8 2015 2 4
## 9 2015 3 4
## 10 2015 4 27
## 11 2015 5 112
## 12 2015 6 154
## 13 2015 7 211
mean_G30A_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0915
## # A tibble: 2 × 2
## year average_G30A
## <dbl> <dbl>
## 1 2009 6.04
## 2 2015 6.00
sd_G30A_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0915
## # A tibble: 2 × 2
## year sd_G30A
## <dbl> <dbl>
## 1 2009 1.00
## 2 2015 1.09
ggplot(Long_format_2009_2015, aes(x = factor(year), y =G30A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Likelihood of well-paying job (G30A) in 2009 and 2015", x = "Year", y = "Likelihood of well-paying job (G30A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G30A_09_15 <- cohens_d(data_2009$G30A, data_2015$G30A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G30A_09_15
## Cohen's d | 95% CI
## -------------------------
## 0.03 | [-0.05, 0.12]
Long_format_2009_2015 %>% group_by(year) %>% count(G41A)
## # A tibble: 15 × 3
## # Groups: year [2]
## year G41A n
## <dbl> <dbl> <int>
## 1 2009 1 30
## 2 2009 2 14
## 3 2009 3 39
## 4 2009 4 70
## 5 2009 5 128
## 6 2009 6 89
## 7 2009 7 144
## 8 2009 9 1
## 9 2015 1 54
## 10 2015 2 50
## 11 2015 3 50
## 12 2015 4 73
## 13 2015 5 115
## 14 2015 6 75
## 15 2015 7 98
remove_id_G41A <- Long_format_2009_2015 %>% filter(year_new == 0,G41A == 9) %>% pull(TAS_ID)
data_2009_G41A <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(G41A != 9)
data_2015_G41A <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(!(TAS_ID %in% remove_id_G41A))
Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41A)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41A n
## <dbl> <dbl> <int>
## 1 2009 1 30
## 2 2009 2 14
## 3 2009 3 39
## 4 2009 4 70
## 5 2009 5 128
## 6 2009 6 89
## 7 2009 7 144
## 8 2015 1 54
## 9 2015 2 50
## 10 2015 3 50
## 11 2015 4 73
## 12 2015 5 115
## 13 2015 6 74
## 14 2015 7 98
Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 514
## 2 2015 514
G41A_t_test_09_15 <- t.test(data_2009_G41A$G41A, data_2015_G41A$G41A, paired = TRUE)
G41A_t_test_09_15
##
## Paired t-test
##
## data: data_2009_G41A$G41A and data_2015_G41A$G41A
## t = 8.2779, df = 513, p-value = 1.093e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.4985536 0.8088394
## sample estimates:
## mean difference
## 0.6536965
mean_G41A_0915 <- Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0915
## # A tibble: 2 × 2
## year average_G41A
## <dbl> <dbl>
## 1 2009 5.13
## 2 2015 4.48
sd_G41A_0915 <- Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0915
## # A tibble: 2 × 2
## year sd_G41A
## <dbl> <dbl>
## 1 2009 1.70
## 2 2015 1.93
Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job status (G41A) in 2009 and 2015", x = "Year", y = "Importance of job status (G41A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G41A_09_15 <- cohens_d(data_2009_G41A$G41A, data_2015_G41A$G41A, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41A_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.37 | [0.28, 0.45]
G41B_t_test_09_15 <- t.test(data_2009$G41B, data_2015$G41B, paired = TRUE)
G41B_t_test_09_15
##
## Paired t-test
##
## data: data_2009$G41B and data_2015$G41B
## t = 4.075, df = 514, p-value = 5.329e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1246969 0.3568565
## sample estimates:
## mean difference
## 0.2407767
Long_format_2009_2015 %>% group_by(year) %>% count(G41B)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41B n
## <dbl> <dbl> <int>
## 1 2009 1 5
## 2 2009 2 4
## 3 2009 3 14
## 4 2009 4 45
## 5 2009 5 124
## 6 2009 6 166
## 7 2009 7 157
## 8 2015 1 16
## 9 2015 2 6
## 10 2015 3 15
## 11 2015 4 41
## 12 2015 5 161
## 13 2015 6 148
## 14 2015 7 128
mean_G41B_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0915
## # A tibble: 2 × 2
## year average_G41B
## <dbl> <dbl>
## 1 2009 5.73
## 2 2015 5.49
sd_G41B_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0915
## # A tibble: 2 × 2
## year sd_G41B
## <dbl> <dbl>
## 1 2009 1.20
## 2 2015 1.36
ggplot(Long_format_2009_2015, aes(x = factor(year), y = G41B, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of decision-making (G41B) in 2009 and 2015", x = "Year", y = "Importance of decision-making (G41B)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G41B_09_15 <- cohens_d(data_2009$G41B, data_2015$G41B, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41B_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.18 | [0.09, 0.27]
Long_format_2009_2015 %>% group_by(year) %>% count(G41C)
## # A tibble: 15 × 3
## # Groups: year [2]
## year G41C n
## <dbl> <dbl> <int>
## 1 2009 1 5
## 2 2009 2 6
## 3 2009 3 24
## 4 2009 4 50
## 5 2009 5 141
## 6 2009 6 156
## 7 2009 7 133
## 8 2015 0 388
## 9 2015 1 2
## 10 2015 2 2
## 11 2015 3 5
## 12 2015 4 14
## 13 2015 5 35
## 14 2015 6 38
## 15 2015 7 31
remove_id_G41C <- Long_format_2009_2015 %>% filter(year_new == 1,G41C == 0) %>% pull(TAS_ID)
data_2009_G41C <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id_G41C))
data_2015_G41C <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(G41C != 0)
Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41C)
## # A tibble: 13 × 3
## # Groups: year [2]
## year G41C n
## <dbl> <dbl> <int>
## 1 2009 2 2
## 2 2009 3 5
## 3 2009 4 9
## 4 2009 5 47
## 5 2009 6 35
## 6 2009 7 29
## 7 2015 1 2
## 8 2015 2 2
## 9 2015 3 5
## 10 2015 4 14
## 11 2015 5 35
## 12 2015 6 38
## 13 2015 7 31
Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 127
## 2 2015 127
G41C_t_test_09_15 <- t.test(data_2009_G41C$G41C, data_2015_G41C$G41C, paired = TRUE)
G41C_t_test_09_15
##
## Paired t-test
##
## data: data_2009_G41C$G41C and data_2015_G41C$G41C
## t = 0.3639, df = 126, p-value = 0.7165
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.2096807 0.3041689
## sample estimates:
## mean difference
## 0.04724409
mean_G41C_0915 <- Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0915
## # A tibble: 2 × 2
## year average_G41C
## <dbl> <dbl>
## 1 2009 5.54
## 2 2015 5.49
sd_G41C_0915 <- Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0915
## # A tibble: 2 × 2
## year sd_G41C
## <dbl> <dbl>
## 1 2009 1.13
## 2 2015 1.31
Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of challenging work (G41C) in 2009 and 2015", x = "Year", y = "Importance of challenging work (G41C)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G41C_09_15 <- cohens_d(data_2009_G41C$G41C, data_2015_G41C$G41C, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41C_09_15
## Cohen's d | 95% CI
## -------------------------
## 0.03 | [-0.14, 0.21]
G41H_t_test_09_15 <- t.test(data_2009$G41H, data_2015$G41H, paired = TRUE)
G41H_t_test_09_15
##
## Paired t-test
##
## data: data_2009$G41H and data_2015$G41H
## t = 4.9117, df = 514, p-value = 1.216e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1852472 0.4322285
## sample estimates:
## mean difference
## 0.3087379
Long_format_2009_2015 %>% group_by(year) %>% count(G41H)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41H n
## <dbl> <dbl> <int>
## 1 2009 1 2
## 2 2009 2 4
## 3 2009 3 4
## 4 2009 4 21
## 5 2009 5 53
## 6 2009 6 115
## 7 2009 7 316
## 8 2015 1 11
## 9 2015 2 13
## 10 2015 3 10
## 11 2015 4 23
## 12 2015 5 67
## 13 2015 6 117
## 14 2015 7 274
mean_G41H_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0915
## # A tibble: 2 × 2
## year average_G41H
## <dbl> <dbl>
## 1 2009 6.36
## 2 2015 6.05
sd_G41H_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0915
## # A tibble: 2 × 2
## year sd_G41H
## <dbl> <dbl>
## 1 2009 1.02
## 2 2015 1.40
ggplot(Long_format_2009_2015, aes(x = factor(year), y = G41H, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of healthcare benefits (G41H) in 2009 and 2015", x = "Year", y = "Importance of healthcare benefits (G41H)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G41H_09_15 <- cohens_d(data_2009$G41H, data_2015$G41H, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41H_09_15
## Cohen's d | 95% CI
## ------------------------
## 0.22 | [0.13, 0.30]
Long_format_2009_2015 %>% group_by(year) %>% count(G41P)
## # A tibble: 16 × 3
## # Groups: year [2]
## year G41P n
## <dbl> <dbl> <int>
## 1 2009 1 24
## 2 2009 2 24
## 3 2009 3 46
## 4 2009 4 101
## 5 2009 5 112
## 6 2009 6 102
## 7 2009 7 104
## 8 2009 8 2
## 9 2015 1 25
## 10 2015 2 34
## 11 2015 3 42
## 12 2015 4 95
## 13 2015 5 119
## 14 2015 6 92
## 15 2015 7 107
## 16 2015 8 1
remove_id_G41P_09 <- Long_format_2009_2015 %>% filter(year_new == 0,G41P == 8) %>% pull(TAS_ID)
remove_id_G41P_15 <- Long_format_2009_2015 %>% filter(year_new == 1,G41P == 8) %>% pull(TAS_ID)
data_2009_G41P <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(G41P != 8) %>% filter(!(TAS_ID %in% remove_id_G41P_15))
data_2015_G41P <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(G41P != 8) %>% filter(!(TAS_ID %in% remove_id_G41P_09))
Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41P)
## # A tibble: 14 × 3
## # Groups: year [2]
## year G41P n
## <dbl> <dbl> <int>
## 1 2009 1 24
## 2 2009 2 24
## 3 2009 3 46
## 4 2009 4 101
## 5 2009 5 112
## 6 2009 6 102
## 7 2009 7 103
## 8 2015 1 25
## 9 2015 2 33
## 10 2015 3 42
## 11 2015 4 95
## 12 2015 5 118
## 13 2015 6 92
## 14 2015 7 107
Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)
## # A tibble: 2 × 2
## year n
## <dbl> <int>
## 1 2009 512
## 2 2015 512
G41P_t_test_09_15 <- t.test(data_2009_G41P$G41P, data_2015_G41P$G41P, paired = TRUE)
G41P_t_test_09_15
##
## Paired t-test
##
## data: data_2009_G41P$G41P and data_2015_G41P$G41P
## t = 0.44786, df = 511, p-value = 0.6544
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1256782 0.1998970
## sample estimates:
## mean difference
## 0.03710938
mean_G41P_0915 <- Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0915
## # A tibble: 2 × 2
## year average_G41P
## <dbl> <dbl>
## 1 2009 4.90
## 2 2015 4.86
sd_G41P_0915 <- Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0915
## # A tibble: 2 × 2
## year sd_G41P
## <dbl> <dbl>
## 1 2009 1.65
## 2 2015 1.70
Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41P, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job central to identity (G41P) in 2009 and 2015", x = "Year", y = "Importance of job central to identity (G41P)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_G41P_09_15 <- cohens_d(data_2009_G41P$G41P, data_2015_G41P$G41P, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_G41P_09_15
## Cohen's d | 95% CI
## -------------------------
## 0.02 | [-0.07, 0.11]
H1_t_test_09_15 <- t.test(data_2009$H1, data_2015$H1, paired = TRUE)
H1_t_test_09_15
##
## Paired t-test
##
## data: data_2009$H1 and data_2015$H1
## t = -3.1495, df = 514, p-value = 0.001731
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.22386069 -0.05186746
## sample estimates:
## mean difference
## -0.1378641
Long_format_2009_2015 %>% group_by(year) %>% count(H1)
## # A tibble: 10 × 3
## # Groups: year [2]
## year H1 n
## <dbl> <dbl> <int>
## 1 2009 1 128
## 2 2009 2 232
## 3 2009 3 121
## 4 2009 4 28
## 5 2009 5 6
## 6 2015 1 92
## 7 2015 2 250
## 8 2015 3 123
## 9 2015 4 43
## 10 2015 5 7
mean_H1_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0915
## # A tibble: 2 × 2
## year average_H1
## <dbl> <dbl>
## 1 2009 2.13
## 2 2015 2.27
sd_H1_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0915
## # A tibble: 2 × 2
## year sd_H1
## <dbl> <dbl>
## 1 2009 0.889
## 2 2015 0.896
ggplot(Long_format_2009_2015, aes(x = factor(year), y = H1, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of General Health (H1) in 2009 and 2015", x = "Year", y = "General Health (H1)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()
effect_size_H1_09_15 <- cohens_d(data_2009$H1, data_2015$H1, paired = TRUE)
## For paired samples, 'repeated_measures_d()' provides more options.
effect_size_H1_09_15
## Cohen's d | 95% CI
## --------------------------
## -0.14 | [-0.23, -0.05]