TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library (lmerTest)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)

Step 2: Import the data

TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx")

Step 3: Preview the data

view(TAS_data_long_format_age)

head(TAS_data_long_format_age)

## # A tibble: 6 × 42
##     TAS TAS05 TAS09 TAS15 `1968 Interview Number` `Person Number` Gender
##   <dbl> <dbl> <dbl> <dbl>                   <dbl>           <dbl>  <dbl>
## 1     2     1     1    NA                       4             180      2
## 2     2     1     1    NA                       5              32      2
## 3     2     1     1    NA                       6              34      1
## 4     2     1     1    NA                      14              30      1
## 5     1     1    NA    NA                      18              38      2
## 6     2     1     1    NA                      47              34      2
## # ℹ 35 more variables: `Individual is sample` <dbl>, `Year ID Number` <dbl>,
## #   `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## #   `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## #   C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## #   E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## #   G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## #   G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>, …

2005 & 2009

Step 5: t-test (2005 & 2009)

Filter the data (2005 & 2009)

Long_format_2005_2009 <- TAS_data_long_format_age %>% filter(year < 2010) %>% filter (TAS05 == 1) %>%  filter (TAS09 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>%  group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2027 ~ Age_18_graduate[year == 2005] + 4, Age_18_graduate == 2023 ~ Age_18_graduate[year == 2009] - 4, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>%  group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2009] - Age_18_graduate[year == 2005]) %>% filter(age_difference < 6) %>% filter(age_difference > 2) %>% ungroup()

view(Long_format_2005_2009)

knitr::kable(head(Long_format_2005_2009[, 1:43]))

TAS	TAS05	TAS09	TAS15	TAS_ID	Gender	Individual is sample	Year ID Number	Sequence Number	Relationship to Head	Release Number	B5A	B5D	B6C	C2D	C2E	C2F	E1_1st_mention	E1_2nd_mention	E3	G1	G2_month	G2_year	G10	G11	G30A	G41A	G41B	G41C	G41H	G41P	H1	L7_1st_mention	Age_17_graduate	Age_18_graduate	year	year_new	age_difference
2	1	1	NA	5_32	2	2	624	3	30	5	5	5	5	7	7	7	1	7	0	1	5	2002	1	1	7	7	6	6	7	5	2	1	20	21	2005	-1	4
2	1	1	NA	6_34	1	2	1202	51	30	5	2	2	6	1	1	1	7	0	5	1	5	2002	1	1	0	7	5	7	5	3	1	1	20	21	2005	-1	4
2	1	1	NA	14_30	1	2	736	51	30	5	4	4	4	2	1	1	2	0	0	1	6	2003	1	5	6	5	6	6	5	5	2	1	19	20	2005	-1	4
2	1	1	NA	47_34	2	2	2516	3	30	5	4	5	6	4	5	2	1	0	0	1	5	2005	5	0	6	3	6	4	7	4	1	1	17	18	2005	-1	4
2	1	1	NA	53_35	2	2	1392	3	33	5	4	5	5	3	1	1	1	0	0	1	6	2002	1	1	7	6	7	7	7	5	1	1	20	21	2005	-1	4
2	1	1	NA	53_36	2	2	1616	3	30	5	4	5	7	4	1	1	6	0	1	1	6	2005	1	5	7	7	7	5	7	6	2	1	17	18	2005	-1	4

count(Long_format_2005_2009, year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   542
## 2  2009   542

count(Long_format_2005_2009, Age_18_graduate)

## # A tibble: 12 × 2
##    Age_18_graduate     n
##              <dbl> <int>
##  1              14     1
##  2              17     7
##  3              18   166
##  4              19   150
##  5              20   141
##  6              21    83
##  7              22   167
##  8              23   150
##  9              24   140
## 10              25    76
## 11              26     2
## 12              27     1

Age count - 2005

Long_format_2005_2009 %>% filter(year == 2005) %>% count(Age_18_graduate)

## # A tibble: 8 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              14     1
## 2              17     7
## 3              18   165
## 4              19   150
## 5              20   141
## 6              21    76
## 7              22     1
## 8              23     1

Age count - 2009

Long_format_2005_2009 %>% filter(year == 2009) %>% count(Age_18_graduate)

## # A tibble: 8 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              18     1
## 2              21     7
## 3              22   166
## 4              23   149
## 5              24   140
## 6              25    76
## 7              26     2
## 8              27     1

SPECIAL: G30A, G41P, H1 (2005 & 2009)

data_2005 <- Long_format_2005_2009 %>% filter(year_new == -1)
data_2009 <- Long_format_2005_2009 %>% filter(year_new == 0)

B5A: Responsibility for self

B5A_t_test_05_09 <- t.test(data_2005$B5A, data_2009$B5A, paired = TRUE)
B5A_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$B5A and data_2009$B5A
## t = -14.895, df = 541, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.9376670 -0.7191595
## sample estimates:
## mean difference 
##      -0.8284133

Long_format_2005_2009 %>% group_by(year) %>% count(B5A)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5A     n
##    <dbl> <dbl> <int>
##  1  2005     1    25
##  2  2005     2   105
##  3  2005     3   119
##  4  2005     4   167
##  5  2005     5   126
##  6  2009     1    11
##  7  2009     2    29
##  8  2009     3    47
##  9  2009     4   146
## 10  2009     5   309

mean_B5A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0509

## # A tibble: 2 × 2
##    year average_B5A
##   <dbl>       <dbl>
## 1  2005        3.49
## 2  2009        4.32

sd_B5A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0509

## # A tibble: 2 × 2
##    year sd_B5A
##   <dbl>  <dbl>
## 1  2005  1.18 
## 2  2009  0.978

Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B5A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Responsibility for Self (B5A) in 2005 and 2009", x = "Year", y = "Responsibility for Self (B5A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

effect_size_B5A_05_09 <- cohens_d(data_2005$B5A, data_2009$B5A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B5A_05_09

## Cohen's d |         95% CI
## --------------------------
## -0.64     | [-0.73, -0.55]

B5D: Managing own money

B5D_t_test_05_09 <- t.test(data_2005$B5D, data_2009$B5D, paired = TRUE)
B5D_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$B5D and data_2009$B5D
## t = -8.0405, df = 541, p-value = 5.668e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.4499712 -0.2732761
## sample estimates:
## mean difference 
##      -0.3616236

Long_format_2005_2009 %>% group_by(year) %>% count(B5D)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5D     n
##    <dbl> <dbl> <int>
##  1  2005     1     9
##  2  2005     2    17
##  3  2005     3    59
##  4  2005     4   140
##  5  2005     5   317
##  6  2009     1     3
##  7  2009     2     9
##  8  2009     3    23
##  9  2009     4    64
## 10  2009     5   443

mean_B5D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0509

## # A tibble: 2 × 2
##    year average_B5D
##   <dbl>       <dbl>
## 1  2005        4.36
## 2  2009        4.73

sd_B5D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0509

## # A tibble: 2 × 2
##    year sd_B5D
##   <dbl>  <dbl>
## 1  2005  0.915
## 2  2009  0.672

Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B5D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Managing own money (B5D) in 2005 and 2009", x = "Year", y = "Managing own money (B5D)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_B5D_05_09 <- cohens_d(data_2005$B5D, data_2009$B5D, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B5D_05_09

## Cohen's d |         95% CI
## --------------------------
## -0.35     | [-0.43, -0.26]

B6C: Money management skills

B6C_t_test_05_09 <- t.test(data_2005$B6C, data_2009$B6C, paired = TRUE)
B6C_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$B6C and data_2009$B6C
## t = -1.4667, df = 541, p-value = 0.143
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.20285228  0.02942055
## sample estimates:
## mean difference 
##     -0.08671587

Long_format_2005_2009 %>% group_by(year) %>% count(B6C)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   B6C     n
##    <dbl> <dbl> <int>
##  1  2005     1     9
##  2  2005     2    21
##  3  2005     3    30
##  4  2005     4    71
##  5  2005     5   163
##  6  2005     6   115
##  7  2005     7   133
##  8  2009     1     4
##  9  2009     2    10
## 10  2009     3    22
## 11  2009     4    88
## 12  2009     5   163
## 13  2009     6   134
## 14  2009     7   121

mean_B6C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0509

## # A tibble: 2 × 2
##    year average_B6C
##   <dbl>       <dbl>
## 1  2005        5.28
## 2  2009        5.37

sd_B6C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0509

## # A tibble: 2 × 2
##    year sd_B6C
##   <dbl>  <dbl>
## 1  2005   1.44
## 2  2009   1.27

Long_format_2005_2009 %>% ggplot(aes(x = factor(year), y = B6C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Money management skills (B6C) in 2005 and 2009", x = "Year", y = "Money management skills (B6C)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_B6C_05_09 <- cohens_d(data_2005$B6C, data_2009$B6C, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B6C_05_09

## Cohen's d |        95% CI
## -------------------------
## -0.06     | [-0.15, 0.02]

C2D: Worry about expenses

C2D_t_test_05_09 <- t.test(data_2005$C2D, data_2009$C2D, paired = TRUE)
C2D_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$C2D and data_2009$C2D
## t = -3.0212, df = 541, p-value = 0.002636
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.46582598 -0.09874967
## sample estimates:
## mean difference 
##      -0.2822878

Long_format_2005_2009 %>% group_by(year) %>% count(C2D)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2D     n
##    <dbl> <dbl> <int>
##  1  2005     1    82
##  2  2005     2    87
##  3  2005     3    96
##  4  2005     4    98
##  5  2005     5    81
##  6  2005     6    48
##  7  2005     7    50
##  8  2009     1    60
##  9  2009     2    85
## 10  2009     3    90
## 11  2009     4    97
## 12  2009     5    82
## 13  2009     6    62
## 14  2009     7    66

mean_C2D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0509

## # A tibble: 2 × 2
##    year average_C2D
##   <dbl>       <dbl>
## 1  2005        3.65
## 2  2009        3.93

sd_C2D_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0509

## # A tibble: 2 × 2
##    year sd_C2D
##   <dbl>  <dbl>
## 1  2005   1.84
## 2  2009   1.87

ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about expenses (C2D) in 2005 and 2009", x = "Year", y = "Worry about expenses (C2D)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_C2D_05_09 <- cohens_d(data_2005$C2D, data_2009$C2D, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2D_05_09

## Cohen's d |         95% CI
## --------------------------
## -0.13     | [-0.21, -0.05]

C2E: Worry about future job

C2E_t_test_05_09 <- t.test(data_2005$C2E, data_2009$C2E, paired = TRUE)
C2E_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$C2E and data_2009$C2E
## t = -2.5726, df = 541, p-value = 0.01036
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.42299611 -0.05670869
## sample estimates:
## mean difference 
##      -0.2398524

Long_format_2005_2009 %>% group_by(year) %>% count(C2E)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2E     n
##    <dbl> <dbl> <int>
##  1  2005     1   103
##  2  2005     2   101
##  3  2005     3    89
##  4  2005     4    65
##  5  2005     5    86
##  6  2005     6    59
##  7  2005     7    39
##  8  2009     1    75
##  9  2009     2   100
## 10  2009     3    90
## 11  2009     4    90
## 12  2009     5    64
## 13  2009     6    67
## 14  2009     7    56

mean_C2E_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0509

## # A tibble: 2 × 2
##    year average_C2E
##   <dbl>       <dbl>
## 1  2005        3.49
## 2  2009        3.73

sd_C2E_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0509

## # A tibble: 2 × 2
##    year sd_C2E
##   <dbl>  <dbl>
## 1  2005   1.90
## 2  2009   1.90

ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2E, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about future job (C2E) in 2005 and 2009", x = "Year", y = "Worry about future job (C2E)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_C2E_05_09 <- cohens_d(data_2005$C2E, data_2009$C2E, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2E_05_09

## Cohen's d |         95% CI
## --------------------------
## -0.11     | [-0.19, -0.03]

C2F: Discouraged about future

C2F_t_test_05_09 <- t.test(data_2005$C2F, data_2009$C2F, paired = TRUE)
C2F_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$C2F and data_2009$C2F
## t = -2.7143, df = 541, p-value = 0.006854
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.38799401 -0.06219049
## sample estimates:
## mean difference 
##      -0.2250923

Long_format_2005_2009 %>% group_by(year) %>% count(C2F)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2F     n
##    <dbl> <dbl> <int>
##  1  2005     1   125
##  2  2005     2   132
##  3  2005     3    93
##  4  2005     4    81
##  5  2005     5    56
##  6  2005     6    32
##  7  2005     7    23
##  8  2009     1    94
##  9  2009     2   128
## 10  2009     3   109
## 11  2009     4    71
## 12  2009     5    78
## 13  2009     6    38
## 14  2009     7    24

mean_C2F_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0509

## # A tibble: 2 × 2
##    year average_C2F
##   <dbl>       <dbl>
## 1  2005        3.00
## 2  2009        3.22

sd_C2F_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0509

## # A tibble: 2 × 2
##    year sd_C2F
##   <dbl>  <dbl>
## 1  2005   1.72
## 2  2009   1.71

ggplot(Long_format_2005_2009, aes(x = factor(year), y = C2F, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Discouraged about future (C2F) in 2005 and 2009", x = "Year", y = "Discouraged about future (C2F)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_C2F_05_09 <- cohens_d(data_2005$C2F, data_2009$C2F, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2F_05_09

## Cohen's d |         95% CI
## --------------------------
## -0.12     | [-0.20, -0.03]

G30A: Likelihood of well-paying job (SPECIAL)

Long_format_2005_2009 %>% group_by(year) %>% count(G30A)

## # A tibble: 15 × 3
## # Groups:   year [2]
##     year  G30A     n
##    <dbl> <dbl> <int>
##  1  2005     0    56
##  2  2005     1     1
##  3  2005     2     1
##  4  2005     3     5
##  5  2005     4    26
##  6  2005     5    91
##  7  2005     6   180
##  8  2005     7   182
##  9  2009     1     3
## 10  2009     2     4
## 11  2009     3     5
## 12  2009     4    32
## 13  2009     5   136
## 14  2009     6   168
## 15  2009     7   194

Long_format_2005_2009 %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   542
## 2  2009   542

remove_id <- Long_format_2005_2009 %>% filter(year_new == -1, G30A == 0) %>% pull(TAS_ID)
data_2005_G30A <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(G30A != 0) 
data_2009_G30A <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id))

G30A_t_test_05_09 <- t.test(data_2005_G30A$G30A, data_2009_G30A$G30A, paired = TRUE)
G30A_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005_G30A$G30A and data_2009_G30A$G30A
## t = 2.5203, df = 485, p-value = 0.01205
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.03083453 0.24900086
## sample estimates:
## mean difference 
##       0.1399177

Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G30A) %>% ungroup()

## # A tibble: 14 × 3
##     year  G30A     n
##    <dbl> <dbl> <int>
##  1  2005     1     1
##  2  2005     2     1
##  3  2005     3     5
##  4  2005     4    26
##  5  2005     5    91
##  6  2005     6   180
##  7  2005     7   182
##  8  2009     1     3
##  9  2009     2     4
## 10  2009     3     5
## 11  2009     4    26
## 12  2009     5   127
## 13  2009     6   149
## 14  2009     7   172

Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   486
## 2  2009   486

mean_G30A_0509 <- Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0509

## # A tibble: 2 × 2
##    year average_G30A
##   <dbl>        <dbl>
## 1  2005         6.03
## 2  2009         5.89

sd_G30A_0509 <- Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0509

## # A tibble: 2 × 2
##    year sd_G30A
##   <dbl>   <dbl>
## 1  2005   0.977
## 2  2009   1.09

Long_format_2005_2009 %>% filter(G30A != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G30A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Likelihood of well-paying job (G30A) in 2005 and 2009", x = "Year", y = "Likelihood of well-paying job (G30A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G30A_05_09 <- cohens_d(data_2005_G30A$G30A, data_2009_G30A$G30A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G30A_05_09

## Cohen's d |       95% CI
## ------------------------
## 0.11      | [0.03, 0.20]

G41A: Importance of job status

G41A_t_test_05_09 <- t.test(data_2005$G41A, data_2009$G41A, paired = TRUE)
G41A_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$G41A and data_2009$G41A
## t = 8.1936, df = 541, p-value = 1.841e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.4614852 0.7525369
## sample estimates:
## mean difference 
##       0.6070111

Long_format_2005_2009 %>% group_by(year) %>% count(G41A)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41A     n
##    <dbl> <dbl> <int>
##  1  2005     1    22
##  2  2005     2    24
##  3  2005     3    37
##  4  2005     4    61
##  5  2005     5   127
##  6  2005     6   117
##  7  2005     7   154
##  8  2009     1    48
##  9  2009     2    42
## 10  2009     3    42
## 11  2009     4    84
## 12  2009     5   140
## 13  2009     6    85
## 14  2009     7   101

mean_G41A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0509

## # A tibble: 2 × 2
##    year average_G41A
##   <dbl>        <dbl>
## 1  2005         5.24
## 2  2009         4.63

sd_G41A_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0509

## # A tibble: 2 × 2
##    year sd_G41A
##   <dbl>   <dbl>
## 1  2005    1.65
## 2  2009    1.83

ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job status (G41A) in 2005 and 2009", x = "Year", y = "Importance of job status (G41A)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G41A_05_09 <- cohens_d(data_2005$G41A, data_2009$G41A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41A_05_09

## Cohen's d |       95% CI
## ------------------------
## 0.35      | [0.27, 0.44]

G41B: Importance of decision-making

G41B_t_test_05_09 <- t.test(data_2005$G41B, data_2009$G41B, paired = TRUE)
G41B_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$G41B and data_2009$G41B
## t = 1.8469, df = 541, p-value = 0.0653
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.006920177  0.224632354
## sample estimates:
## mean difference 
##       0.1088561

Long_format_2005_2009 %>% group_by(year) %>% count(G41B)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41B     n
##    <dbl> <dbl> <int>
##  1  2005     1     2
##  2  2005     2     5
##  3  2005     3    14
##  4  2005     4    41
##  5  2005     5   135
##  6  2005     6   198
##  7  2005     7   147
##  8  2009     1     7
##  9  2009     2     5
## 10  2009     3    14
## 11  2009     4    48
## 12  2009     5   153
## 13  2009     6   170
## 14  2009     7   145

mean_G41B_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0509

## # A tibble: 2 × 2
##    year average_G41B
##   <dbl>        <dbl>
## 1  2005         5.74
## 2  2009         5.63

sd_G41B_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0509

## # A tibble: 2 × 2
##    year sd_G41B
##   <dbl>   <dbl>
## 1  2005    1.11
## 2  2009    1.21

ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41B, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of decision-making (G41B) in 2005 and 2009", x = "Year", y = "Importance of decision-making (G41B)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G41B_05_09 <- cohens_d(data_2005$G41B, data_2009$G41B, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41B_05_09

## Cohen's d |        95% CI
## -------------------------
## 0.08      | [-0.01, 0.16]

G41C: Importance of challenging work

G41C_t_test_05_09 <- t.test(data_2005$G41C, data_2009$G41C, paired = TRUE)
G41C_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$G41C and data_2009$G41C
## t = -1.2232, df = 541, p-value = 0.2218
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.18270369  0.04248229
## sample estimates:
## mean difference 
##      -0.0701107

Long_format_2005_2009 %>% group_by(year) %>% count(G41C)

## # A tibble: 13 × 3
## # Groups:   year [2]
##     year  G41C     n
##    <dbl> <dbl> <int>
##  1  2005     2     6
##  2  2005     3    21
##  3  2005     4    67
##  4  2005     5   160
##  5  2005     6   168
##  6  2005     7   120
##  7  2009     1     4
##  8  2009     2     8
##  9  2009     3    13
## 10  2009     4    52
## 11  2009     5   161
## 12  2009     6   171
## 13  2009     7   133

mean_G41C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0509

## # A tibble: 2 × 2
##    year average_G41C
##   <dbl>        <dbl>
## 1  2005         5.52
## 2  2009         5.59

sd_G41C_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0509

## # A tibble: 2 × 2
##    year sd_G41C
##   <dbl>   <dbl>
## 1  2005    1.14
## 2  2009    1.18

ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of challenging work (G41C) in 2005 and 2009", x = "Year", y = "Importance of challenging work (G41C)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G41C_05_09 <- cohens_d(data_2005$G41C, data_2009$G41C, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41C_05_09

## Cohen's d |        95% CI
## -------------------------
## -0.05     | [-0.14, 0.03]

G41H: Importance of healthcare benefits

G41H_t_test_05_09 <- t.test(data_2005$G41H, data_2009$G41H, paired = TRUE)
G41H_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005$G41H and data_2009$G41H
## t = 1.4269, df = 541, p-value = 0.1542
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.0291852  0.1841668
## sample estimates:
## mean difference 
##      0.07749077

Long_format_2005_2009 %>% group_by(year) %>% count(G41H)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41H     n
##    <dbl> <dbl> <int>
##  1  2005     1     1
##  2  2005     2     3
##  3  2005     3     4
##  4  2005     4    21
##  5  2005     5    56
##  6  2005     6   142
##  7  2005     7   315
##  8  2009     1     7
##  9  2009     2     5
## 10  2009     3     6
## 11  2009     4    22
## 12  2009     5    55
## 13  2009     6   129
## 14  2009     7   318

mean_G41H_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0509

## # A tibble: 2 × 2
##    year average_G41H
##   <dbl>        <dbl>
## 1  2005         6.35
## 2  2009         6.27

sd_G41H_0509 <- Long_format_2005_2009 %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0509

## # A tibble: 2 × 2
##    year sd_G41H
##   <dbl>   <dbl>
## 1  2005   0.960
## 2  2009   1.16

ggplot(Long_format_2005_2009, aes(x = factor(year), y = G41H, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of healthcare benefits (G41H) in 2005 and 2009", x = "Year", y = "Importance of healthcare benefits (G41H)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G41H_05_09 <- cohens_d(data_2005$G41H, data_2009$G41H, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41H_05_09

## Cohen's d |        95% CI
## -------------------------
## 0.06      | [-0.02, 0.15]

G41P: Importance of job central to identity (SPECIAL)

Long_format_2005_2009 %>% group_by(year) %>% count(G41P)

## # A tibble: 15 × 3
## # Groups:   year [2]
##     year  G41P     n
##    <dbl> <dbl> <int>
##  1  2005     1    18
##  2  2005     2    18
##  3  2005     3    48
##  4  2005     4    94
##  5  2005     5   138
##  6  2005     6   123
##  7  2005     7   102
##  8  2005     9     1
##  9  2009     1    44
## 10  2009     2    56
## 11  2009     3    46
## 12  2009     4   117
## 13  2009     5   129
## 14  2009     6    72
## 15  2009     7    78

remove_id_G41P <- Long_format_2005_2009 %>% filter(year_new == -1, G41P == 9) %>% pull(TAS_ID)
data_2005_G41P <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(G41P != 9) 
data_2009_G41P <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id_G41P))

G41P_t_test_05_09 <- t.test(data_2005_G41P$G41P, data_2009_G41P$G41P, paired = TRUE)
G41P_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005_G41P$G41P and data_2009_G41P$G41P
## t = 7.5301, df = 540, p-value = 2.144e-13
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.4576866 0.7807607
## sample estimates:
## mean difference 
##       0.6192237

Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41P)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41P     n
##    <dbl> <dbl> <int>
##  1  2005     1    18
##  2  2005     2    18
##  3  2005     3    48
##  4  2005     4    94
##  5  2005     5   138
##  6  2005     6   123
##  7  2005     7   102
##  8  2009     1    44
##  9  2009     2    56
## 10  2009     3    46
## 11  2009     4   116
## 12  2009     5   129
## 13  2009     6    72
## 14  2009     7    78

Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   541
## 2  2009   541

mean_G41P_0509 <- Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0509

## # A tibble: 2 × 2
##    year average_G41P
##   <dbl>        <dbl>
## 1  2005         5.02
## 2  2009         4.40

sd_G41P_0509 <- Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0509

## # A tibble: 2 × 2
##    year sd_G41P
##   <dbl>   <dbl>
## 1  2005    1.53
## 2  2009    1.77

Long_format_2005_2009 %>% filter(G41P != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41P, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job central to identity (G41P) in 2005 and 2009", x = "Year", y = "Importance of job central to identity (G41P)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_G41P_05_09 <- cohens_d(data_2005_G41P$G41P, data_2009_G41P$G41P, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41P_05_09

## Cohen's d |       95% CI
## ------------------------
## 0.32      | [0.24, 0.41]

H1: General Health (SPECIAL)

Long_format_2005_2009 %>% group_by(year) %>% count(H1)

## # A tibble: 12 × 3
## # Groups:   year [2]
##     year    H1     n
##    <dbl> <dbl> <int>
##  1  2005     1   142
##  2  2005     2   222
##  3  2005     3   140
##  4  2005     4    33
##  5  2005     5     4
##  6  2005     9     1
##  7  2009     1   121
##  8  2009     2   234
##  9  2009     3   147
## 10  2009     4    37
## 11  2009     5     2
## 12  2009     9     1

remove_id_H1_05 <- Long_format_2005_2009 %>% filter(year_new == -1,H1 == 9) %>% pull(TAS_ID)
remove_id_H1_09 <- Long_format_2005_2009 %>% filter(year_new == 0, H1 == 9) %>% pull(TAS_ID)
data_2005_H1 <- Long_format_2005_2009 %>% filter(year_new == -1) %>% filter(H1 != 9) %>% filter(!(TAS_ID %in% remove_id_H1_09))
data_2009_H1 <- Long_format_2005_2009 %>% filter(year_new == 0) %>% filter(H1 != 9) %>% filter(!(TAS_ID %in% remove_id_H1_05))

H1_t_test_05_09 <- t.test(data_2005_H1$H1, data_2009_H1$H1, paired = TRUE)
H1_t_test_05_09

## 
##  Paired t-test
## 
## data:  data_2005_H1$H1 and data_2009_H1$H1
## t = -1.3779, df = 539, p-value = 0.1688
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.13924890  0.02443409
## sample estimates:
## mean difference 
##     -0.05740741

Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(H1)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year    H1     n
##    <dbl> <dbl> <int>
##  1  2005     1   142
##  2  2005     2   222
##  3  2005     3   139
##  4  2005     4    33
##  5  2005     5     4
##  6  2009     1   121
##  7  2009     2   233
##  8  2009     3   147
##  9  2009     4    37
## 10  2009     5     2

Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2005   540
## 2  2009   540

mean_H1_0509 <- Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0509

## # A tibble: 2 × 2
##    year average_H1
##   <dbl>      <dbl>
## 1  2005       2.14
## 2  2009       2.20

sd_H1_0509 <- Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0509

## # A tibble: 2 × 2
##    year sd_H1
##   <dbl> <dbl>
## 1  2005 0.902
## 2  2009 0.876

Long_format_2005_2009 %>% filter(H1 != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = H1, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of General Health (H1) in 2005 and 2009", x = "Year", y = "General Health (H1)") + scale_fill_manual(values = c("2005" = "skyblue", "2009" = "salmon")) + theme_minimal()

effect_size_H1_05_09 <- cohens_d(data_2005_H1$H1, data_2009_H1$H1, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_H1_05_09

## Cohen's d |        95% CI
## -------------------------
## -0.06     | [-0.14, 0.03]

2009 & 2015

Step 6: t-test (2009 & 2015)

Filter the data (2009 & 2015)

Long_format_2009_2015 <- TAS_data_long_format_age %>% filter (TAS09 == 1) %>%  filter (TAS15 == 1) %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1)) %>%  group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(Age_18_graduate == 2033 ~ Age_18_graduate[year == 2009] + 6, Age_18_graduate == 2027 ~ Age_18_graduate[year == 2015] - 6, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter (Age_18_graduate <100) %>%  group_by(TAS_ID) %>% mutate(age_difference = Age_18_graduate[year == 2015] - Age_18_graduate[year == 2009]) %>% filter(age_difference < 8) %>% filter(age_difference > 4) %>% ungroup()

view(Long_format_2009_2015)

knitr::kable(head(Long_format_2009_2015[, 1:43]))

TAS	TAS05	TAS09	TAS15	TAS_ID	Gender	Individual is sample	Year ID Number	Sequence Number	Relationship to Head	Release Number	B5A	B5D	B6C	C2D	C2E	C2F	E1_1st_mention	E3	G1	G2_month	G2_year	G10	G11	G30A	G41A	G41B	G41C	G41H	G41P	H1	L7_1st_mention	Age_17_graduate	Age_18_graduate	year	age_difference
2	NA	1	1	4_39	2	2	13	3	60	3	4	5	4	6	7	5	1	0	1	5	2008	1	5	5	6	5	2	7	6	2	1	18	19	2009	6
2	NA	1	1	7_40	2	2	3836	2	22	3	2	2	7	7	3	4	6	5	1	6	2007	5	0	5	5	2	5	6	5	3	1	19	20	2009	6
2	NA	1	1	7_41	1	2	576	2	30	3	3	4	7	4	5	4	3	5	1	5	2009	5	0	7	5	6	5	7	5	2	1	17	18	2009	6
2	NA	1	1	10_34	2	2	3276	3	30	3	4	5	6	4	1	1	1	0	1	6	2008	1	5	7	7	5	4	7	5	2	2	18	19	2009	6
2	NA	1	1	14_31	2	2	713	1	10	3	5	5	7	4	4	4	1	0	1	6	2005	5	0	6	5	7	6	7	2	4	1	21	22	2009	6
2	NA	1	1	22_30	2	2	907	2	30	3	5	1	4	3	1	1	1	0	1	5	2006	1	1	7	6	6	6	6	6	1	2	20	21	2009	6

count(Long_format_2009_2015, year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   515
## 2  2015   515

count(Long_format_2009_2015, Age_18_graduate)

## # A tibble: 11 × 2
##    Age_18_graduate     n
##              <dbl> <int>
##  1              15     1
##  2              18   154
##  3              19   134
##  4              20   136
##  5              21    89
##  6              22     2
##  7              24   155
##  8              25   134
##  9              26   136
## 10              27    87
## 11              28     2

Age count - 2009

Long_format_2009_2015 %>% filter(year == 2009) %>% count(Age_18_graduate)

## # A tibble: 6 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              15     1
## 2              18   154
## 3              19   134
## 4              20   136
## 5              21    88
## 6              22     2

Age count - 2015

Long_format_2009_2015 %>% filter(year == 2015) %>% count(Age_18_graduate)

## # A tibble: 6 × 2
##   Age_18_graduate     n
##             <dbl> <int>
## 1              21     1
## 2              24   155
## 3              25   134
## 4              26   136
## 5              27    87
## 6              28     2

data_2009 <- Long_format_2009_2015 %>% filter(year_new == 0)
data_2015 <- Long_format_2009_2015 %>% filter(year_new == 1)

SPECIAL: B5A, B5D, G41A, G41P (2009 & 2015)

B5A: Responsibility for self (SPECIAL)

Long_format_2009_2015 %>% group_by(year) %>% count(B5A)

## # A tibble: 12 × 3
## # Groups:   year [2]
##     year   B5A     n
##    <dbl> <dbl> <int>
##  1  2009     1    34
##  2  2009     2    94
##  3  2009     3   113
##  4  2009     4   157
##  5  2009     5   116
##  6  2009     9     1
##  7  2015     1    12
##  8  2015     2    15
##  9  2015     3    35
## 10  2015     4    92
## 11  2015     5   359
## 12  2015     8     2

remove_id_B5A_09 <- Long_format_2009_2015 %>% filter(year_new == 0,B5A > 7) %>% pull(TAS_ID)
remove_id_B5A_15 <- Long_format_2009_2015 %>% filter(year_new == 1, B5A > 7) %>% pull(TAS_ID)
data_2009_B5A <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(B5A < 7) %>% filter(!(TAS_ID %in% remove_id_B5A_15))
data_2015_B5A <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(B5A < 7) %>% filter(!(TAS_ID %in% remove_id_B5A_09))

B5A_t_test_09_15 <- t.test(data_2009_B5A$B5A, data_2015_B5A$B5A, paired = TRUE)
B5A_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009_B5A$B5A and data_2015_B5A$B5A
## t = -17.111, df = 511, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.1823174 -0.9387764
## sample estimates:
## mean difference 
##       -1.060547

Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(B5A)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5A     n
##    <dbl> <dbl> <int>
##  1  2009     1    34
##  2  2009     2    94
##  3  2009     3   111
##  4  2009     4   157
##  5  2009     5   116
##  6  2015     1    12
##  7  2015     2    15
##  8  2015     3    35
##  9  2015     4    91
## 10  2015     5   359

Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   512
## 2  2015   512

mean_B5A_0915 <- Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_B5A = mean(B5A, na.rm = TRUE)) %>% ungroup()
mean_B5A_0915

## # A tibble: 2 × 2
##    year average_B5A
##   <dbl>       <dbl>
## 1  2009        3.44
## 2  2015        4.50

sd_B5A_0915 <- Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_B5A = sd(B5A, na.rm = TRUE)) %>% ungroup()
sd_B5A_0915

## # A tibble: 2 × 2
##    year sd_B5A
##   <dbl>  <dbl>
## 1  2009  1.21 
## 2  2015  0.919

Long_format_2009_2015 %>% filter(B5A < 7) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = B5A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Responsibility for Self (B5A) in 2009 and 2015", x = "Year", y = "Responsibility for Self (B5A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_B5A_09_15 <- cohens_d(data_2009_B5A$B5A, data_2015_B5A$B5A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B5A_09_15

## Cohen's d |         95% CI
## --------------------------
## -0.76     | [-0.85, -0.66]

B5D: Managing own money (SPECIAL)

Long_format_2009_2015 %>% group_by(year) %>% count(B5D)

## # A tibble: 11 × 3
## # Groups:   year [2]
##     year   B5D     n
##    <dbl> <dbl> <int>
##  1  2009     1    16
##  2  2009     2    15
##  3  2009     3    41
##  4  2009     4   123
##  5  2009     5   319
##  6  2009     9     1
##  7  2015     1     4
##  8  2015     2    12
##  9  2015     3    19
## 10  2015     4    43
## 11  2015     5   437

remove_id_B5D <- Long_format_2009_2015 %>% filter(year_new == 0,B5D == 9) %>% pull(TAS_ID)
data_2009_B5D <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(B5A != 9) 
data_2015_B5D <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(!(TAS_ID %in% remove_id_B5D))

Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(B5D)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year   B5D     n
##    <dbl> <dbl> <int>
##  1  2009     1    16
##  2  2009     2    15
##  3  2009     3    41
##  4  2009     4   123
##  5  2009     5   319
##  6  2015     1     4
##  7  2015     2    12
##  8  2015     3    18
##  9  2015     4    43
## 10  2015     5   437

Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   514
## 2  2015   514

B5D_t_test_09_15 <- t.test(data_2009_B5D$B5D, data_2015_B5D$B5D, paired = TRUE)
B5D_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009_B5D$B5D and data_2015_B5D$B5D
## t = -6.8478, df = 513, p-value = 2.151e-11
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.445656 -0.246951
## sample estimates:
## mean difference 
##      -0.3463035

mean_B5D_0915 <- Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_B5D = mean(B5D, na.rm = TRUE)) %>% ungroup()
mean_B5D_0915

## # A tibble: 2 × 2
##    year average_B5D
##   <dbl>       <dbl>
## 1  2009        4.39
## 2  2015        4.75

sd_B5D_0915 <- Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_B5D = sd(B5D, na.rm = TRUE)) %>% ungroup()
sd_B5D_0915

## # A tibble: 2 × 2
##    year sd_B5D
##   <dbl>  <dbl>
## 1  2009  0.974
## 2  2015  0.703

Long_format_2009_2015 %>% filter(B5D != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = B5D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Managing own money (B5D) in 2009 and 2015", x = "Year", y = "Managing own money (B5D)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_B5D_09_15 <- cohens_d(data_2009_B5D$B5D, data_2015_B5D$B5D, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B5D_09_15

## Cohen's d |         95% CI
## --------------------------
## -0.30     | [-0.39, -0.21]

B6C: Money management skills

B6C_t_test_09_15 <- t.test(data_2009$B6C, data_2015$B6C, paired = TRUE)
B6C_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$B6C and data_2015$B6C
## t = 1.5235, df = 514, p-value = 0.1282
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.02586079  0.20450156
## sample estimates:
## mean difference 
##      0.08932039

Long_format_2009_2015 %>% group_by(year) %>% count(B6C)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   B6C     n
##    <dbl> <dbl> <int>
##  1  2009     1     4
##  2  2009     2    11
##  3  2009     3    17
##  4  2009     4    62
##  5  2009     5   154
##  6  2009     6   139
##  7  2009     7   128
##  8  2015     1     4
##  9  2015     2     7
## 10  2015     3    27
## 11  2015     4    74
## 12  2015     5   140
## 13  2015     6   157
## 14  2015     7   106

mean_B6C_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_B6C = mean(B6C, na.rm = TRUE)) %>% ungroup()
mean_B6C_0915

## # A tibble: 2 × 2
##    year average_B6C
##   <dbl>       <dbl>
## 1  2009        5.49
## 2  2015        5.40

sd_B6C_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_B6C = sd(B6C, na.rm = TRUE)) %>% ungroup()
sd_B6C_0915

## # A tibble: 2 × 2
##    year sd_B6C
##   <dbl>  <dbl>
## 1  2009   1.27
## 2  2015   1.25

ggplot(Long_format_2009_2015, aes(x = factor(year), y = B6C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Money management skills (B6C) in 2009 and 2015", x = "Year", y = "Money management skills (B6C)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_B6C_09_15 <- cohens_d(data_2009$B6C, data_2015$B6C, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_B6C_09_15

## Cohen's d |        95% CI
## -------------------------
## 0.07      | [-0.02, 0.15]

C2D: Worry about expenses

C2D_t_test_09_15 <- t.test(data_2009$C2D, data_2015$C2D, paired = TRUE)
C2D_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$C2D and data_2015$C2D
## t = 3.2447, df = 514, p-value = 0.001252
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1256330 0.5112602
## sample estimates:
## mean difference 
##       0.3184466

Long_format_2009_2015 %>% group_by(year) %>% count(C2D)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2D     n
##    <dbl> <dbl> <int>
##  1  2009     1    77
##  2  2009     2    97
##  3  2009     3    65
##  4  2009     4    89
##  5  2009     5    79
##  6  2009     6    49
##  7  2009     7    59
##  8  2015     1    98
##  9  2015     2    99
## 10  2015     3    84
## 11  2015     4    78
## 12  2015     5    73
## 13  2015     6    46
## 14  2015     7    37

mean_C2D_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2D = mean(C2D, na.rm = TRUE)) %>% ungroup()
mean_C2D_0915

## # A tibble: 2 × 2
##    year average_C2D
##   <dbl>       <dbl>
## 1  2009        3.74
## 2  2015        3.42

sd_C2D_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2D = sd(C2D, na.rm = TRUE)) %>% ungroup()
sd_C2D_0915

## # A tibble: 2 × 2
##    year sd_C2D
##   <dbl>  <dbl>
## 1  2009   1.93
## 2  2015   1.86

ggplot(Long_format_2009_2015, aes(x = factor(year), y = C2D, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about expenses (C2D) in 2009 and 2015", x = "Year", y = "Worry about expenses (C2D)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_C2D_09_15 <- cohens_d(data_2009$C2D, data_2015$C2D, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2D_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.14      | [0.06, 0.23]

C2E: Worry about future job

C2E_t_test_09_15 <- t.test(data_2009$C2E, data_2015$C2E, paired = TRUE)
C2E_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$C2E and data_2015$C2E
## t = 5.1874, df = 514, p-value = 3.073e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.2907333 0.6451890
## sample estimates:
## mean difference 
##       0.4679612

Long_format_2009_2015 %>% group_by(year) %>% count(C2E)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2E     n
##    <dbl> <dbl> <int>
##  1  2009     1    89
##  2  2009     2    89
##  3  2009     3    77
##  4  2009     4    86
##  5  2009     5    76
##  6  2009     6    36
##  7  2009     7    62
##  8  2015     1   109
##  9  2015     2   111
## 10  2015     3    96
## 11  2015     4    74
## 12  2015     5    67
## 13  2015     6    25
## 14  2015     7    33

mean_C2E_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2E = mean(C2E, na.rm = TRUE)) %>% ungroup()
mean_C2E_0915

## # A tibble: 2 × 2
##    year average_C2E
##   <dbl>       <dbl>
## 1  2009        3.63
## 2  2015        3.17

sd_C2E_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2E = sd(C2E, na.rm = TRUE)) %>% ungroup()
sd_C2E_0915

## # A tibble: 2 × 2
##    year sd_C2E
##   <dbl>  <dbl>
## 1  2009   1.94
## 2  2015   1.78

ggplot(Long_format_2009_2015, aes(x = factor(year), y = C2E, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Worry about future job (C2E) in 2009 and 2015", x = "Year", y = "Worry about future job (C2E)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_C2E_09_15 <- cohens_d(data_2009$C2E, data_2015$C2E, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2E_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.23      | [0.14, 0.32]

C2F: Discouraged about future

C2F_t_test_09_15 <- t.test(data_2009$C2F, data_2015$C2F, paired = TRUE)
C2F_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$C2F and data_2015$C2F
## t = 2.8413, df = 514, p-value = 0.004672
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.07009728 0.38427165
## sample estimates:
## mean difference 
##       0.2271845

Long_format_2009_2015 %>% group_by(year) %>% count(C2F)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year   C2F     n
##    <dbl> <dbl> <int>
##  1  2009     1   108
##  2  2009     2   121
##  3  2009     3    96
##  4  2009     4    87
##  5  2009     5    47
##  6  2009     6    27
##  7  2009     7    29
##  8  2015     1   123
##  9  2015     2   144
## 10  2015     3    87
## 11  2015     4    71
## 12  2015     5    48
## 13  2015     6    21
## 14  2015     7    21

mean_C2F_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_C2F = mean(C2F, na.rm = TRUE)) %>% ungroup()
mean_C2F_0915

## # A tibble: 2 × 2
##    year average_C2F
##   <dbl>       <dbl>
## 1  2009        3.08
## 2  2015        2.85

sd_C2F_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_C2F = sd(C2F, na.rm = TRUE)) %>% ungroup()
sd_C2F_0915

## # A tibble: 2 × 2
##    year sd_C2F
##   <dbl>  <dbl>
## 1  2009   1.73
## 2  2015   1.66

ggplot(Long_format_2009_2015, aes(x = factor(year), y =C2F, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Discouraged about future (C2F) in 2009 and 2015", x = "Year", y = "Discouraged about future (C2F)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_C2F_09_15 <- cohens_d(data_2009$C2F, data_2015$C2F, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_C2F_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.13      | [0.04, 0.21]

G30A: Likelihood of well-paying job

G30A_t_test_09_15 <- t.test(data_2009$G30A, data_2015$G30A, paired = TRUE)
G30A_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$G30A and data_2015$G30A
## t = 0.78506, df = 514, p-value = 0.4328
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.06126544  0.14281884
## sample estimates:
## mean difference 
##       0.0407767

Long_format_2009_2015 %>% group_by(year) %>% count(G30A)

## # A tibble: 13 × 3
## # Groups:   year [2]
##     year  G30A     n
##    <dbl> <dbl> <int>
##  1  2009     1     3
##  2  2009     3     3
##  3  2009     4    25
##  4  2009     5   108
##  5  2009     6   171
##  6  2009     7   205
##  7  2015     1     3
##  8  2015     2     4
##  9  2015     3     4
## 10  2015     4    27
## 11  2015     5   112
## 12  2015     6   154
## 13  2015     7   211

mean_G30A_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G30A = mean(G30A, na.rm = TRUE)) %>% ungroup()
mean_G30A_0915

## # A tibble: 2 × 2
##    year average_G30A
##   <dbl>        <dbl>
## 1  2009         6.04
## 2  2015         6.00

sd_G30A_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G30A = sd(G30A, na.rm = TRUE)) %>% ungroup()
sd_G30A_0915

## # A tibble: 2 × 2
##    year sd_G30A
##   <dbl>   <dbl>
## 1  2009    1.00
## 2  2015    1.09

ggplot(Long_format_2009_2015, aes(x = factor(year), y =G30A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Likelihood of well-paying job (G30A) in 2009 and 2015", x = "Year", y = "Likelihood of well-paying job (G30A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G30A_09_15 <- cohens_d(data_2009$G30A, data_2015$G30A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G30A_09_15

## Cohen's d |        95% CI
## -------------------------
## 0.03      | [-0.05, 0.12]

G41A: Importance of job status (SPECIAL)

Long_format_2009_2015 %>% group_by(year) %>% count(G41A)

## # A tibble: 15 × 3
## # Groups:   year [2]
##     year  G41A     n
##    <dbl> <dbl> <int>
##  1  2009     1    30
##  2  2009     2    14
##  3  2009     3    39
##  4  2009     4    70
##  5  2009     5   128
##  6  2009     6    89
##  7  2009     7   144
##  8  2009     9     1
##  9  2015     1    54
## 10  2015     2    50
## 11  2015     3    50
## 12  2015     4    73
## 13  2015     5   115
## 14  2015     6    75
## 15  2015     7    98

remove_id_G41A <- Long_format_2009_2015 %>% filter(year_new == 0,G41A == 9) %>% pull(TAS_ID)
data_2009_G41A <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(G41A != 9) 
data_2015_G41A <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(!(TAS_ID %in% remove_id_G41A))

Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41A)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41A     n
##    <dbl> <dbl> <int>
##  1  2009     1    30
##  2  2009     2    14
##  3  2009     3    39
##  4  2009     4    70
##  5  2009     5   128
##  6  2009     6    89
##  7  2009     7   144
##  8  2015     1    54
##  9  2015     2    50
## 10  2015     3    50
## 11  2015     4    73
## 12  2015     5   115
## 13  2015     6    74
## 14  2015     7    98

Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   514
## 2  2015   514

G41A_t_test_09_15 <- t.test(data_2009_G41A$G41A, data_2015_G41A$G41A, paired = TRUE)
G41A_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009_G41A$G41A and data_2015_G41A$G41A
## t = 8.2779, df = 513, p-value = 1.093e-15
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.4985536 0.8088394
## sample estimates:
## mean difference 
##       0.6536965

mean_G41A_0915 <- Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41A = mean(G41A, na.rm = TRUE)) %>% ungroup()
mean_G41A_0915

## # A tibble: 2 × 2
##    year average_G41A
##   <dbl>        <dbl>
## 1  2009         5.13
## 2  2015         4.48

sd_G41A_0915 <- Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41A = sd(G41A, na.rm = TRUE)) %>% ungroup()
sd_G41A_0915

## # A tibble: 2 × 2
##    year sd_G41A
##   <dbl>   <dbl>
## 1  2009    1.70
## 2  2015    1.93

Long_format_2009_2015 %>% filter(G41A != 9) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41A, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job status (G41A) in 2009 and 2015", x = "Year", y = "Importance of job status (G41A)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G41A_09_15 <- cohens_d(data_2009_G41A$G41A, data_2015_G41A$G41A, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41A_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.37      | [0.28, 0.45]

G41B: Importance of decision-making

G41B_t_test_09_15 <- t.test(data_2009$G41B, data_2015$G41B, paired = TRUE)
G41B_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$G41B and data_2015$G41B
## t = 4.075, df = 514, p-value = 5.329e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1246969 0.3568565
## sample estimates:
## mean difference 
##       0.2407767

Long_format_2009_2015 %>% group_by(year) %>% count(G41B)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41B     n
##    <dbl> <dbl> <int>
##  1  2009     1     5
##  2  2009     2     4
##  3  2009     3    14
##  4  2009     4    45
##  5  2009     5   124
##  6  2009     6   166
##  7  2009     7   157
##  8  2015     1    16
##  9  2015     2     6
## 10  2015     3    15
## 11  2015     4    41
## 12  2015     5   161
## 13  2015     6   148
## 14  2015     7   128

mean_G41B_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G41B = mean(G41B, na.rm = TRUE)) %>% ungroup()
mean_G41B_0915

## # A tibble: 2 × 2
##    year average_G41B
##   <dbl>        <dbl>
## 1  2009         5.73
## 2  2015         5.49

sd_G41B_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G41B = sd(G41B, na.rm = TRUE)) %>% ungroup()
sd_G41B_0915

## # A tibble: 2 × 2
##    year sd_G41B
##   <dbl>   <dbl>
## 1  2009    1.20
## 2  2015    1.36

ggplot(Long_format_2009_2015, aes(x = factor(year), y = G41B, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of decision-making (G41B) in 2009 and 2015", x = "Year", y = "Importance of decision-making (G41B)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G41B_09_15 <- cohens_d(data_2009$G41B, data_2015$G41B, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41B_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.18      | [0.09, 0.27]

G41C: Importance of challenging work (SPECIAL)

Long_format_2009_2015 %>% group_by(year) %>% count(G41C)

## # A tibble: 15 × 3
## # Groups:   year [2]
##     year  G41C     n
##    <dbl> <dbl> <int>
##  1  2009     1     5
##  2  2009     2     6
##  3  2009     3    24
##  4  2009     4    50
##  5  2009     5   141
##  6  2009     6   156
##  7  2009     7   133
##  8  2015     0   388
##  9  2015     1     2
## 10  2015     2     2
## 11  2015     3     5
## 12  2015     4    14
## 13  2015     5    35
## 14  2015     6    38
## 15  2015     7    31

remove_id_G41C <- Long_format_2009_2015 %>% filter(year_new == 1,G41C == 0) %>% pull(TAS_ID)
data_2009_G41C <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(!(TAS_ID %in% remove_id_G41C))
data_2015_G41C <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(G41C != 0)

Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41C)

## # A tibble: 13 × 3
## # Groups:   year [2]
##     year  G41C     n
##    <dbl> <dbl> <int>
##  1  2009     2     2
##  2  2009     3     5
##  3  2009     4     9
##  4  2009     5    47
##  5  2009     6    35
##  6  2009     7    29
##  7  2015     1     2
##  8  2015     2     2
##  9  2015     3     5
## 10  2015     4    14
## 11  2015     5    35
## 12  2015     6    38
## 13  2015     7    31

Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   127
## 2  2015   127

G41C_t_test_09_15 <- t.test(data_2009_G41C$G41C, data_2015_G41C$G41C, paired = TRUE)
G41C_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009_G41C$G41C and data_2015_G41C$G41C
## t = 0.3639, df = 126, p-value = 0.7165
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.2096807  0.3041689
## sample estimates:
## mean difference 
##      0.04724409

mean_G41C_0915 <- Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41C = mean(G41C, na.rm = TRUE)) %>% ungroup()
mean_G41C_0915

## # A tibble: 2 × 2
##    year average_G41C
##   <dbl>        <dbl>
## 1  2009         5.54
## 2  2015         5.49

sd_G41C_0915 <- Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41C = sd(G41C, na.rm = TRUE)) %>% ungroup()
sd_G41C_0915

## # A tibble: 2 × 2
##    year sd_G41C
##   <dbl>   <dbl>
## 1  2009    1.13
## 2  2015    1.31

Long_format_2009_2015 %>% filter(G41C != 0) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41C, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of challenging work (G41C) in 2009 and 2015", x = "Year", y = "Importance of challenging work (G41C)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G41C_09_15 <- cohens_d(data_2009_G41C$G41C, data_2015_G41C$G41C, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41C_09_15

## Cohen's d |        95% CI
## -------------------------
## 0.03      | [-0.14, 0.21]

G41H: Importance of healthcare benefits

G41H_t_test_09_15 <- t.test(data_2009$G41H, data_2015$G41H, paired = TRUE)
G41H_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$G41H and data_2015$G41H
## t = 4.9117, df = 514, p-value = 1.216e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1852472 0.4322285
## sample estimates:
## mean difference 
##       0.3087379

Long_format_2009_2015 %>% group_by(year) %>% count(G41H)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41H     n
##    <dbl> <dbl> <int>
##  1  2009     1     2
##  2  2009     2     4
##  3  2009     3     4
##  4  2009     4    21
##  5  2009     5    53
##  6  2009     6   115
##  7  2009     7   316
##  8  2015     1    11
##  9  2015     2    13
## 10  2015     3    10
## 11  2015     4    23
## 12  2015     5    67
## 13  2015     6   117
## 14  2015     7   274

mean_G41H_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_G41H = mean(G41H, na.rm = TRUE)) %>% ungroup()
mean_G41H_0915

## # A tibble: 2 × 2
##    year average_G41H
##   <dbl>        <dbl>
## 1  2009         6.36
## 2  2015         6.05

sd_G41H_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_G41H = sd(G41H, na.rm = TRUE)) %>% ungroup()
sd_G41H_0915

## # A tibble: 2 × 2
##    year sd_G41H
##   <dbl>   <dbl>
## 1  2009    1.02
## 2  2015    1.40

ggplot(Long_format_2009_2015, aes(x = factor(year), y = G41H, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of healthcare benefits (G41H) in 2009 and 2015", x = "Year", y = "Importance of healthcare benefits (G41H)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G41H_09_15 <- cohens_d(data_2009$G41H, data_2015$G41H, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41H_09_15

## Cohen's d |       95% CI
## ------------------------
## 0.22      | [0.13, 0.30]

G41P: Importance of job central to identity (SPECIAL)

Long_format_2009_2015 %>% group_by(year) %>% count(G41P)

## # A tibble: 16 × 3
## # Groups:   year [2]
##     year  G41P     n
##    <dbl> <dbl> <int>
##  1  2009     1    24
##  2  2009     2    24
##  3  2009     3    46
##  4  2009     4   101
##  5  2009     5   112
##  6  2009     6   102
##  7  2009     7   104
##  8  2009     8     2
##  9  2015     1    25
## 10  2015     2    34
## 11  2015     3    42
## 12  2015     4    95
## 13  2015     5   119
## 14  2015     6    92
## 15  2015     7   107
## 16  2015     8     1

remove_id_G41P_09 <- Long_format_2009_2015 %>% filter(year_new == 0,G41P == 8) %>% pull(TAS_ID)
remove_id_G41P_15 <- Long_format_2009_2015 %>% filter(year_new == 1,G41P == 8) %>% pull(TAS_ID)
data_2009_G41P <- Long_format_2009_2015 %>% filter(year_new == 0) %>% filter(G41P != 8) %>% filter(!(TAS_ID %in% remove_id_G41P_15))
data_2015_G41P <- Long_format_2009_2015 %>% filter(year_new == 1) %>% filter(G41P != 8) %>% filter(!(TAS_ID %in% remove_id_G41P_09))

Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% count(G41P)

## # A tibble: 14 × 3
## # Groups:   year [2]
##     year  G41P     n
##    <dbl> <dbl> <int>
##  1  2009     1    24
##  2  2009     2    24
##  3  2009     3    46
##  4  2009     4   101
##  5  2009     5   112
##  6  2009     6   102
##  7  2009     7   103
##  8  2015     1    25
##  9  2015     2    33
## 10  2015     3    42
## 11  2015     4    95
## 12  2015     5   118
## 13  2015     6    92
## 14  2015     7   107

Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% count(year)

## # A tibble: 2 × 2
##    year     n
##   <dbl> <int>
## 1  2009   512
## 2  2015   512

G41P_t_test_09_15 <- t.test(data_2009_G41P$G41P, data_2015_G41P$G41P, paired = TRUE)
G41P_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009_G41P$G41P and data_2015_G41P$G41P
## t = 0.44786, df = 511, p-value = 0.6544
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1256782  0.1998970
## sample estimates:
## mean difference 
##      0.03710938

mean_G41P_0915 <- Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(average_G41P = mean(G41P, na.rm = TRUE)) %>% ungroup()
mean_G41P_0915

## # A tibble: 2 × 2
##    year average_G41P
##   <dbl>        <dbl>
## 1  2009         4.90
## 2  2015         4.86

sd_G41P_0915 <- Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% group_by(year) %>% summarize(sd_G41P = sd(G41P, na.rm = TRUE)) %>% ungroup()
sd_G41P_0915

## # A tibble: 2 × 2
##    year sd_G41P
##   <dbl>   <dbl>
## 1  2009    1.65
## 2  2015    1.70

Long_format_2009_2015 %>% filter(G41P != 8) %>% group_by(TAS_ID) %>% filter(n_distinct(year) == 2) %>% ungroup() %>% ggplot(aes(x = factor(year), y = G41P, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of Importance of job central to identity (G41P) in 2009 and 2015", x = "Year", y = "Importance of job central to identity (G41P)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_G41P_09_15 <- cohens_d(data_2009_G41P$G41P, data_2015_G41P$G41P, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_G41P_09_15

## Cohen's d |        95% CI
## -------------------------
## 0.02      | [-0.07, 0.11]

H1: General Health

H1_t_test_09_15 <- t.test(data_2009$H1, data_2015$H1, paired = TRUE)
H1_t_test_09_15

## 
##  Paired t-test
## 
## data:  data_2009$H1 and data_2015$H1
## t = -3.1495, df = 514, p-value = 0.001731
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.22386069 -0.05186746
## sample estimates:
## mean difference 
##      -0.1378641

Long_format_2009_2015 %>% group_by(year) %>% count(H1)

## # A tibble: 10 × 3
## # Groups:   year [2]
##     year    H1     n
##    <dbl> <dbl> <int>
##  1  2009     1   128
##  2  2009     2   232
##  3  2009     3   121
##  4  2009     4    28
##  5  2009     5     6
##  6  2015     1    92
##  7  2015     2   250
##  8  2015     3   123
##  9  2015     4    43
## 10  2015     5     7

mean_H1_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(average_H1 = mean(H1, na.rm = TRUE)) %>% ungroup()
mean_H1_0915

## # A tibble: 2 × 2
##    year average_H1
##   <dbl>      <dbl>
## 1  2009       2.13
## 2  2015       2.27

sd_H1_0915 <- Long_format_2009_2015 %>% group_by(year) %>% summarize(sd_H1 = sd(H1, na.rm = TRUE)) %>% ungroup()
sd_H1_0915

## # A tibble: 2 × 2
##    year sd_H1
##   <dbl> <dbl>
## 1  2009 0.889
## 2  2015 0.896

ggplot(Long_format_2009_2015, aes(x = factor(year), y = H1, fill = factor(year))) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Boxplot of General Health (H1) in 2009 and 2015", x = "Year", y = "General Health (H1)") + scale_fill_manual(values = c("2009" = "skyblue", "2015" = "salmon")) + theme_minimal()

effect_size_H1_09_15 <- cohens_d(data_2009$H1, data_2015$H1, paired = TRUE)

## For paired samples, 'repeated_measures_d()' provides more options.

effect_size_H1_09_15

## Cohen's d |         95% CI
## --------------------------
## -0.14     | [-0.23, -0.05]

TAS_ttest

Sher May

2024-11-01

TAS Descriptive statistics

Step 1: Loading Packages

Step 2: Import the data

Step 3: Preview the data

2005 & 2009

Step 5: t-test (2005 & 2009)

B5A: Responsibility for self

B5D: Managing own money

B6C: Money management skills

C2D: Worry about expenses

C2E: Worry about future job

C2F: Discouraged about future

G30A: Likelihood of well-paying job (SPECIAL)

G41A: Importance of job status

G41B: Importance of decision-making

G41C: Importance of challenging work

G41H: Importance of healthcare benefits

G41P: Importance of job central to identity (SPECIAL)

H1: General Health (SPECIAL)

2009 & 2015

Step 6: t-test (2009 & 2015)

B5A: Responsibility for self (SPECIAL)

B5D: Managing own money (SPECIAL)

B6C: Money management skills

C2D: Worry about expenses

C2E: Worry about future job

C2F: Discouraged about future

G30A: Likelihood of well-paying job

G41A: Importance of job status (SPECIAL)

G41B: Importance of decision-making

G41C: Importance of challenging work (SPECIAL)

G41H: Importance of healthcare benefits

G41P: Importance of job central to identity (SPECIAL)

H1: General Health