TAS_Anova

TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library (lmerTest)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)
library(multcomp)

Step 2: Import the data

TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx") %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1))

Step 3: Preview the data

view(TAS_data_long_format_age)

head(TAS_data_long_format_age)

## # A tibble: 6 × 42
##     TAS TAS05 TAS09 TAS15 TAS_ID Gender `Individual is sample` `Year ID Number`
##   <dbl> <dbl> <dbl> <dbl> <chr>   <dbl>                  <dbl>            <dbl>
## 1     2     1     1    NA 4_180       2                      3              771
## 2     2     1     1    NA 5_32        2                      2              624
## 3     2     1     1    NA 6_34        1                      2             1202
## 4     2     1     1    NA 14_30       1                      2              736
## 5     1     1    NA    NA 18_38       2                      2             5647
## 6     2     1     1    NA 47_34       2                      2             2516
## # ℹ 34 more variables: `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## #   `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## #   C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## #   E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## #   G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## #   G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>,
## #   L7_1st_mention <dbl>, L7_2nd_mention <dbl>, L7_3rd_mention <dbl>, …

TAS_data_long_format_age %>% count(year)

## # A tibble: 3 × 2
##    year     n
##   <dbl> <int>
## 1  2005   745
## 2  2009  1554
## 3  2015  1641

TAS_data_long_format_age %>% count(Age_18_graduate)

## # A tibble: 16 × 2
##    Age_18_graduate     n
##              <dbl> <int>
##  1              18   465
##  2              19   504
##  3              20   366
##  4              21   287
##  5              22   331
##  6              23   256
##  7              24   208
##  8              25   121
##  9              26    33
## 10              27    16
## 11              28     1
## 12              29     1
## 13              32     1
## 14            2023   146
## 15            2027   264
## 16            2033   940

TAS_data_long_format_age %>% group_by(year) %>%  count(Age_18_graduate) %>% ungroup()

## # A tibble: 32 × 3
##     year Age_18_graduate     n
##    <dbl>           <dbl> <int>
##  1  2005              18   179
##  2  2005              19   169
##  3  2005              20   164
##  4  2005              21    83
##  5  2005              22     2
##  6  2005              23     2
##  7  2005            2023   146
##  8  2009              18   195
##  9  2009              19   172
## 10  2009              20   166
## # ℹ 22 more rows

#One-Way ANOVA

TAS_filtered_age <- TAS_data_long_format_age %>% group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(year == 2005 & Age_18_graduate == 2023 & any(year == 2009) ~ first(Age_18_graduate[year == 2009]) - 4, year == 2009 & Age_18_graduate == 2027 & any(year == 2005) ~ first(Age_18_graduate[year == 2005]) + 4, year == 2009 & Age_18_graduate == 2027 & any(year == 2015) ~ first(Age_18_graduate[year == 2015]) - 6, year == 2015 & Age_18_graduate == 2033 & any(year == 2009) ~ first(Age_18_graduate[year == 2009]) + 6,Age_18_graduate >= 1 & Age_18_graduate <= 50 ~ Age_18_graduate, TRUE ~ Age_18_graduate)) %>% ungroup() %>%  filter(Age_18_graduate == 18 | Age_18_graduate == 19) %>% mutate(year_new = factor(year_new, levels = c(-1, 0, 1), labels = c("2005", "2009", "2015")))

view(TAS_filtered_age)

knitr::kable(head(TAS_filtered_age[, 1:42]))

TAS	TAS05	TAS09	TAS15	TAS_ID	Gender	Individual is sample	Year ID Number	Sequence Number	Relationship to Head	Release Number	B5A	B5D	B6C	C2D	C2E	C2F	E1_1st_mention	E1_2nd_mention	E3	G1	G2_month	G2_year	G10	G11	G30A	G41A	G41B	G41C	G41H	G41P	H1	L7_1st_mention	Age_17_graduate	Age_18_graduate	year	year_new
1	1	NA	NA	18_38	2	2	5647	3	98	5	3	4	3	4	2	2	3	7	5	1	6	2004	1	5	6	5	5	5	7	5	2	1	18	19	2005	2005
2	1	1	NA	47_34	2	2	2516	3	30	5	4	5	6	4	5	2	1	0	0	1	5	2005	5	0	6	3	6	4	7	4	1	1	17	18	2005	2005
2	1	1	NA	53_36	2	2	1616	3	30	5	4	5	7	4	1	1	6	0	1	1	6	2005	1	5	7	7	7	5	7	6	2	1	17	18	2005	2005
2	1	1	NA	79_32	2	2	6520	2	30	5	3	4	6	7	5	3	1	7	0	1	5	2004	1	1	0	7	7	6	7	4	1	1	18	19	2005	2005
2	1	1	NA	88_35	1	2	3411	2	30	5	2	5	7	3	1	2	1	0	0	1	5	2005	1	1	7	2	6	5	6	7	2	1	17	18	2005	2005
2	1	1	NA	89_34	2	2	4527	3	30	5	2	4	5	2	3	1	3	7	5	1	5	2005	1	1	7	5	6	4	7	5	1	1	17	18	2005	2005

TAS_filtered_age %>% group_by(year) %>% count(Age_18_graduate)

## # A tibble: 6 × 3
## # Groups:   year [3]
##    year Age_18_graduate     n
##   <dbl>           <dbl> <int>
## 1  2005              18   185
## 2  2005              19   171
## 3  2009              18   196
## 4  2009              19   173
## 5  2015              18    91
## 6  2015              19   163

B5A: Responsibility for self

TAS_filtered_age %>% count(B5A)

## # A tibble: 6 × 2
##     B5A     n
##   <dbl> <int>
## 1     1    48
## 2     2   202
## 3     3   243
## 4     4   318
## 5     5   167
## 6     9     1

B5A_filtered <- TAS_filtered_age %>% filter(B5A < 8) %>% filter(B5A >0)

B5A_Anova <- aov(B5A ~ year_new, data = B5A_filtered)
summary(B5A_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    4.7   2.336   1.826  0.162
## Residuals   975 1247.2   1.279

B5A_pairwise <-TukeyHSD(B5A_Anova)
B5A_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = B5A ~ year_new, data = B5A_filtered)
## 
## $year_new
##                  diff         lwr       upr     p adj
## 2009-2005 -0.09379580 -0.29115264 0.1035610 0.5046582
## 2015-2005  0.08028842 -0.13776097 0.2983378 0.6629646
## 2015-2009  0.17408422 -0.04247977 0.3906482 0.1430000

B5A_glht <- glht(B5A_Anova, linfct = mcp(year_new = "Tukey")) 
summary(B5A_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = B5A ~ year_new, data = B5A_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.09380    0.08408  -1.116    0.504
## 2015 - 2005 == 0  0.08029    0.09289   0.864    0.663
## 2015 - 2009 == 0  0.17408    0.09226   1.887    0.143
## (Adjusted p values reported -- single-step method)

B5A_pairwise_t_test <- pairwise.t.test(B5A_filtered$B5A, B5A_filtered$year_new, p.adjust.method = "BH")
print(B5A_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  B5A_filtered$B5A and B5A_filtered$year_new 
## 
##      2005 2009
## 2009 0.39 -   
## 2015 0.39 0.18
## 
## P value adjustment method: BH

B5A_means <- B5A_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(B5A),SD = sd(B5A),Count = n())
print(B5A_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      3.38  1.11   356
## 2 2009      3.28  1.15   368
## 3 2015      3.46  1.14   254

B5A_filtered$predicted_B5A <- predict(B5A_Anova)
B5A_filtered$year_factor_B5A <- factor(B5A_filtered$year)

ggplot(B5A_filtered, aes(x = Age_18_graduate, y = B5A, color = year_factor_B5A)) + geom_point(aes(shape = year_factor_B5A), alpha = 0.5) + geom_line(aes(y = predicted_B5A), size = 1) +  labs(title = "Responsibility for Self (B5A) by Age and Year",x = "Age", y = "Responsibility for Self (B5A)", color = "Year", shape = "Year") + theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(B5A_filtered, aes(x = year_factor_B5A, y = B5A, fill = year_factor_B5A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (B5A) in 2005 and 2015",x = "Year", y = "Responsibility for Self (B5A)", fill = "Year") + theme_minimal()

B5D: Managing own money

TAS_filtered_age %>% count(B5D)

## # A tibble: 6 × 2
##     B5D     n
##   <dbl> <int>
## 1     1    32
## 2     2    34
## 3     3    95
## 4     4   249
## 5     5   568
## 6     9     1

B5D_filtered <- TAS_filtered_age %>% filter(B5D < 8) %>% filter(B5D >0)

B5D_Anova <- aov(B5D ~ year_new, data = B5D_filtered)
summary(B5D_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    0.0  0.0042   0.004  0.996
## Residuals   975  989.4  1.0147

B5D_pairwise <-TukeyHSD(B5D_Anova)
B5D_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = B5D ~ year_new, data = B5D_filtered)
## 
## $year_new
##                    diff        lwr       upr     p adj
## 2009-2005  0.0005190523 -0.1752583 0.1762964 0.9999735
## 2015-2005 -0.0063921083 -0.2005994 0.1878152 0.9967147
## 2015-2009 -0.0069111606 -0.1997955 0.1859732 0.9961078

B5D_glht <- glht(B5D_Anova, linfct = mcp(year_new = "Tukey")) 
summary(B5D_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = B5D ~ year_new, data = B5D_filtered)
## 
## Linear Hypotheses:
##                    Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0  0.0005191  0.0748852   0.007    1.000
## 2015 - 2005 == 0 -0.0063921  0.0827368  -0.077    0.997
## 2015 - 2009 == 0 -0.0069112  0.0821731  -0.084    0.996
## (Adjusted p values reported -- single-step method)

B5D_pairwise_t_test <- pairwise.t.test(B5D_filtered$B5D, B5D_filtered$year_new, p.adjust.method = "BH")
print(B5D_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  B5D_filtered$B5D and B5D_filtered$year_new 
## 
##      2005 2009
## 2009 0.99 -   
## 2015 0.99 0.99
## 
## P value adjustment method: BH

B5D_means <- B5D_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(B5D),SD = sd(B5D),Count = n())
print(B5D_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      4.32 0.957   356
## 2 2009      4.32 1.03    368
## 3 2015      4.31 1.05    254

B5D_filtered$predicted_B5D <- predict(B5D_Anova)
B5D_filtered$year_factor_B5D <- factor(B5D_filtered$year)

ggplot(B5D_filtered, aes(x = Age_18_graduate, y = B5D, color = year_factor_B5D)) + geom_point(aes(shape = year_factor_B5D), alpha = 0.5) + geom_line(aes(y = predicted_B5D), size = 1) +  labs(title = "Managing own money (B5D) by Age and Year",x = "Age", y = "Managing own money (B5D)", color = "Year", shape = "Year") + theme_minimal()

ggplot(B5D_filtered, aes(x = year_factor_B5D, y = B5D, fill = year_factor_B5D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Managing own money (B5D) in 2005 and 2015",x = "Year", y = "Managing own money (B5D)", fill = "Year") + theme_minimal()

B6C: Money management skills

TAS_filtered_age %>% count(B6C)

## # A tibble: 7 × 2
##     B6C     n
##   <dbl> <int>
## 1     1     9
## 2     2    23
## 3     3    52
## 4     4   122
## 5     5   276
## 6     6   239
## 7     7   258

B6C_filtered <- TAS_filtered_age %>% filter(B6C < 8) %>% filter(B6C >0)

B6C_Anova <- aov(B6C ~ year_new, data = B6C_filtered)
summary(B6C_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    4.6   2.297   1.271  0.281
## Residuals   976 1763.8   1.807

B6C_pairwise <-TukeyHSD(B6C_Anova)
B6C_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = B6C ~ year_new, data = B6C_filtered)
## 
## $year_new
##                  diff        lwr       upr     p adj
## 2009-2005  0.12987577 -0.1045438 0.3642953 0.3952346
## 2015-2005 -0.02430771 -0.2834785 0.2348631 0.9736358
## 2015-2009 -0.15418347 -0.4114462 0.1030793 0.3377514

B6C_glht <- glht(B6C_Anova, linfct = mcp(year_new = "Tukey")) 
summary(B6C_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = B6C ~ year_new, data = B6C_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0  0.12988    0.09987   1.300    0.395
## 2015 - 2005 == 0 -0.02431    0.11041  -0.220    0.974
## 2015 - 2009 == 0 -0.15418    0.10960  -1.407    0.337
## (Adjusted p values reported -- single-step method)

B6C_pairwise_t_test <- pairwise.t.test(B6C_filtered$B6C, B6C_filtered$year_new, p.adjust.method = "BH")
print(B6C_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  B6C_filtered$B6C and B6C_filtered$year_new 
## 
##      2005 2009
## 2009 0.29 -   
## 2015 0.83 0.29
## 
## P value adjustment method: BH

B6C_means <- B6C_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(B6C),SD = sd(B6C),Count = n())
print(B6C_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      5.39  1.43   356
## 2 2009      5.52  1.23   369
## 3 2015      5.37  1.37   254

B6C_filtered$predicted_B6C <- predict(B6C_Anova)
B6C_filtered$year_factor_B6C <- factor(B6C_filtered$year)

ggplot(B6C_filtered, aes(x = Age_18_graduate, y = B6C, color = year_factor_B6C)) + geom_point(aes(shape = year_factor_B6C), alpha = 0.5) + geom_line(aes(y = predicted_B6C), size = 1) +  labs(title = "Money management skills (B6C) by Age and Year",x = "Age", y = "Money management skills (B6C)", color = "Year", shape = "Year") + theme_minimal()

ggplot(B6C_filtered, aes(x = year_factor_B6C, y = B6C, fill = year_factor_B6C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Money management skills (B6C) in 2005 and 2015",x = "Year", y = "Money management skills (B6C)", fill = "Year") + theme_minimal()

C2D: Worry about expenses

TAS_filtered_age %>% count(C2D)

## # A tibble: 7 × 2
##     C2D     n
##   <dbl> <int>
## 1     1   165
## 2     2   158
## 3     3   159
## 4     4   171
## 5     5   144
## 6     6    84
## 7     7    98

C2D_filtered <- TAS_filtered_age %>% filter(C2D < 8) %>% filter(C2D >0)

C2D_Anova <- aov(C2D ~ year_new, data = C2D_filtered)
summary(C2D_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2      8   4.105   1.146  0.318
## Residuals   976   3494   3.580

C2D_pairwise <-TukeyHSD(C2D_Anova)
C2D_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = C2D ~ year_new, data = C2D_filtered)
## 
## $year_new
##                  diff        lwr       upr     p adj
## 2009-2005 -0.04100058 -0.3709610 0.2889599 0.9541902
## 2015-2005 -0.22586924 -0.5906686 0.1389302 0.3140844
## 2015-2009 -0.18486866 -0.5469824 0.1772451 0.4544183

C2D_glht <- glht(C2D_Anova, linfct = mcp(year_new = "Tukey")) 
summary(C2D_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = C2D ~ year_new, data = C2D_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0  -0.0410     0.1406  -0.292    0.954
## 2015 - 2005 == 0  -0.2259     0.1554  -1.453    0.314
## 2015 - 2009 == 0  -0.1849     0.1543  -1.198    0.454
## (Adjusted p values reported -- single-step method)

C2D_pairwise_t_test <- pairwise.t.test(C2D_filtered$C2D, C2D_filtered$year_new, p.adjust.method = "BH")
print(C2D_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  C2D_filtered$C2D and C2D_filtered$year_new 
## 
##      2005 2009
## 2009 0.77 -   
## 2015 0.35 0.35
## 
## P value adjustment method: BH

C2D_means <- C2D_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(C2D),SD = sd(C2D),Count = n())
print(C2D_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      3.70  1.88   356
## 2 2009      3.66  1.93   369
## 3 2015      3.48  1.85   254

C2D_filtered$predicted_C2D <- predict(C2D_Anova)
C2D_filtered$year_factor_C2D <- factor(C2D_filtered$year)

ggplot(C2D_filtered, aes(x = Age_18_graduate, y = C2D, color = year_factor_C2D)) + geom_point(aes(shape = year_factor_C2D), alpha = 0.5) + geom_line(aes(y = predicted_C2D), size = 1) +  labs(title = "Worry about expense (C2D) by Age and Year",x = "Age", y = "Worry about expense (C2D)", color = "Year", shape = "Year") + theme_minimal()

ggplot(C2D_filtered, aes(x = year_factor_C2D, y = C2D, fill = year_factor_C2D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about expense (C2D) in 2005 and 2015",x = "Year", y = "Worry about expense (C2D)", fill = "Year") + theme_minimal()

C2E: Worry about future job

TAS_filtered_age %>% count(C2E)

## # A tibble: 7 × 2
##     C2E     n
##   <dbl> <int>
## 1     1   174
## 2     2   175
## 3     3   158
## 4     4   139
## 5     5   133
## 6     6    88
## 7     7   112

C2E_filtered <- TAS_filtered_age %>% filter(C2E < 8) %>% filter(C2E >0)

C2E_Anova <- aov(C2E ~ year_new, data = C2E_filtered)
summary(C2E_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2      1   0.433   0.112  0.894
## Residuals   976   3765   3.857

C2E_pairwise <-TukeyHSD(C2E_Anova)
C2E_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = C2E ~ year_new, data = C2E_filtered)
## 
## $year_new
##                  diff        lwr       upr     p adj
## 2009-2005 0.061310557 -0.2811726 0.4037937 0.9072802
## 2015-2005 0.062505530 -0.3161388 0.4411499 0.9205857
## 2015-2009 0.001194973 -0.3746618 0.3770518 0.9999693

C2E_glht <- glht(C2E_Anova, linfct = mcp(year_new = "Tukey")) 
summary(C2E_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = C2E ~ year_new, data = C2E_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.061311   0.145906   0.420    0.907
## 2015 - 2005 == 0 0.062506   0.161311   0.387    0.920
## 2015 - 2009 == 0 0.001195   0.160124   0.007    1.000
## (Adjusted p values reported -- single-step method)

C2E_pairwise_t_test <- pairwise.t.test(C2E_filtered$C2E, C2E_filtered$year_new, p.adjust.method = "BH")
print(C2E_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  C2E_filtered$C2E and C2E_filtered$year_new 
## 
##      2005 2009
## 2009 0.99 -   
## 2015 0.99 0.99
## 
## P value adjustment method: BH

C2E_means <- C2E_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(C2E),SD = sd(C2E),Count = n())
print(C2E_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      3.57  1.95   356
## 2 2009      3.63  1.97   369
## 3 2015      3.63  1.98   254

C2E_filtered$predicted_C2E <- predict(C2E_Anova)
C2E_filtered$year_factor_C2E <- factor(C2E_filtered$year)

ggplot(C2E_filtered, aes(x = Age_18_graduate, y = C2E, color = year_factor_C2E)) + geom_point(aes(shape = year_factor_C2E), alpha = 0.5) + geom_line(aes(y = predicted_C2E), size = 1) +  labs(title = "Worry about future job (C2E) by Age and Year",x = "Age", y = "Worry about future job (C2E)", color = "Year", shape = "Year") + theme_minimal()

ggplot(C2E_filtered, aes(x = year_factor_C2E, y = C2E, fill = year_factor_C2E)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about future job (C2E) in 2005 and 2015",x = "Year", y = "Worry about future job (C2E)", fill = "Year") + theme_minimal()

C2F: Discouraged about future

TAS_filtered_age %>% count(C2F)

## # A tibble: 7 × 2
##     C2F     n
##   <dbl> <int>
## 1     1   212
## 2     2   239
## 3     3   164
## 4     4   150
## 5     5   106
## 6     6    54
## 7     7    54

C2F_filtered <- TAS_filtered_age %>% filter(C2F < 8) %>% filter(C2F >0)

C2F_Anova <- aov(C2F ~ year_new, data = C2F_filtered)
summary(C2F_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    2.6   1.304   0.424  0.655
## Residuals   976 3002.3   3.076

C2F_pairwise <-TukeyHSD(C2F_Anova)
C2F_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = C2F ~ year_new, data = C2F_filtered)
## 
## $year_new
##                  diff        lwr       upr     p adj
## 2009-2005  0.11554155 -0.1903038 0.4213869 0.6488063
## 2015-2005  0.02702822 -0.3111099 0.3651663 0.9807805
## 2015-2009 -0.08851333 -0.4241621 0.2471354 0.8097490

C2F_glht <- glht(C2F_Anova, linfct = mcp(year_new = "Tukey")) 
summary(C2F_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = C2F ~ year_new, data = C2F_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0  0.11554    0.13030   0.887    0.648
## 2015 - 2005 == 0  0.02703    0.14405   0.188    0.981
## 2015 - 2009 == 0 -0.08851    0.14299  -0.619    0.809
## (Adjusted p values reported -- single-step method)

C2F_pairwise_t_test <- pairwise.t.test(C2F_filtered$C2F, C2F_filtered$year_new, p.adjust.method = "BH")
print(C2F_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  C2F_filtered$C2F and C2F_filtered$year_new 
## 
##      2005 2009
## 2009 0.80 -   
## 2015 0.85 0.80
## 
## P value adjustment method: BH

C2F_means <- C2F_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(C2F),SD = sd(C2F),Count = n())
print(C2F_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      3.03  1.75   356
## 2 2009      3.14  1.77   369
## 3 2015      3.06  1.74   254

C2F_filtered$predicted_C2F <- predict(C2F_Anova)
C2F_filtered$year_factor_C2F <- factor(C2F_filtered$year)

ggplot(C2F_filtered, aes(x = Age_18_graduate, y = C2F, color = year_factor_C2F)) + geom_point(aes(shape = year_factor_C2F), alpha = 0.5) + geom_line(aes(y = predicted_C2F), size = 1) +  labs(title = "Responsibility for Self (C2F) by Age and Year",x = "Age", y = "Responsibility for Self (C2F)", color = "Year", shape = "Year") + theme_minimal()

ggplot(C2F_filtered, aes(x = year_factor_C2F, y = C2F, fill = year_factor_C2F)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (C2F) in 2005 and 2015",x = "Year", y = "Responsibility for Self (C2F)", fill = "Year") + theme_minimal()

G30A: Likelihood of well-paying job

TAS_filtered_age %>% count(G30A)

## # A tibble: 7 × 2
##    G30A     n
##   <dbl> <int>
## 1     0    36
## 2     1     3
## 3     3     5
## 4     4    52
## 5     5   178
## 6     6   333
## 7     7   372

G30A_filtered <- TAS_filtered_age %>% filter(G30A < 8) %>% filter(G30A >0)

G30A_Anova <- aov(G30A ~ year_new, data = G30A_filtered)
summary(G30A_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    0.3  0.1491    0.16  0.852
## Residuals   940  873.9  0.9297

G30A_pairwise <-TukeyHSD(G30A_Anova)
G30A_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G30A ~ year_new, data = G30A_filtered)
## 
## $year_new
##                  diff        lwr       upr     p adj
## 2009-2005 -0.01642954 -0.1893221 0.1564630 0.9729423
## 2015-2005  0.02805118 -0.1621525 0.2182549 0.9360776
## 2015-2009  0.04448072 -0.1400501 0.2290115 0.8383074

G30A_glht <- glht(G30A_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G30A_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G30A ~ year_new, data = G30A_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.01643    0.07365  -0.223    0.973
## 2015 - 2005 == 0  0.02805    0.08103   0.346    0.936
## 2015 - 2009 == 0  0.04448    0.07861   0.566    0.838
## (Adjusted p values reported -- single-step method)

G30A_pairwise_t_test <- pairwise.t.test(G30A_filtered$G30A, G30A_filtered$year_new, p.adjust.method = "BH")
print(G30A_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G30A_filtered$G30A and G30A_filtered$year_new 
## 
##      2005 2009
## 2009 0.82 -   
## 2015 0.82 0.82
## 
## P value adjustment method: BH

G30A_means <- G30A_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G30A),SD = sd(G30A),Count = n())
print(G30A_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      6.06 0.948   320
## 2 2009      6.05 1.00    369
## 3 2015      6.09 0.930   254

G30A_filtered$predicted_G30A <- predict(G30A_Anova)
G30A_filtered$year_factor_G30A <- factor(G30A_filtered$year)

ggplot(G30A_filtered, aes(x = Age_18_graduate, y = G30A, color = year_factor_G30A)) + geom_point(aes(shape = year_factor_G30A), alpha = 0.5) + geom_line(aes(y = predicted_G30A), size = 1) +  labs(title = "Likelihood of well-paying job (G30A) by Age and Year",x = "Age", y = "Likelihood of well-paying job (G30A)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G30A_filtered, aes(x = year_factor_G30A, y = G30A, fill = year_factor_G30A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Likelihood of well-paying job (G30A) in 2005 and 2015",x = "Year", y = "Likelihood of well-paying job (G30A)", fill = "Year") + theme_minimal()

G41A: Importance of job status

TAS_filtered_age %>% count(G41A)

## # A tibble: 8 × 2
##    G41A     n
##   <dbl> <int>
## 1     1    38
## 2     2    36
## 3     3    76
## 4     4   116
## 5     5   239
## 6     6   199
## 7     7   274
## 8     9     1

G41A_filtered <- TAS_filtered_age %>% filter(G41A < 8) %>% filter(G41A >0)

G41A_Anova <- aov(G41A ~ year_new, data = G41A_filtered)
summary(G41A_Anova)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## year_new      2   42.2  21.091   8.046 0.000342 ***
## Residuals   975 2555.8   2.621                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

G41A_pairwise <-TukeyHSD(G41A_Anova)
G41A_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G41A ~ year_new, data = G41A_filtered)
## 
## $year_new
##                  diff        lwr        upr     p adj
## 2009-2005 -0.03083781 -0.3133562  0.2516806 0.9644608
## 2015-2005 -0.48834380 -0.8004838 -0.1762038 0.0007404
## 2015-2009 -0.45750599 -0.7675196 -0.1474923 0.0016083

G41A_glht <- glht(G41A_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G41A_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G41A ~ year_new, data = G41A_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)    
## 2009 - 2005 == 0 -0.03084    0.12036  -0.256 0.964399    
## 2015 - 2005 == 0 -0.48834    0.13298  -3.672 0.000721 ***
## 2015 - 2009 == 0 -0.45751    0.13207  -3.464 0.001589 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

G41A_pairwise_t_test <- pairwise.t.test(G41A_filtered$G41A, G41A_filtered$year_new, p.adjust.method = "BH")
print(G41A_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G41A_filtered$G41A and G41A_filtered$year_new 
## 
##      2005    2009   
## 2009 0.79784 -      
## 2015 0.00076 0.00083
## 
## P value adjustment method: BH

G41A_means <- G41A_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G41A),SD = sd(G41A),Count = n())
print(G41A_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      5.36  1.63   356
## 2 2009      5.33  1.53   368
## 3 2015      4.87  1.73   254

G41A_filtered$predicted_G41A <- predict(G41A_Anova)
G41A_filtered$year_factor_G41A <- factor(G41A_filtered$year)

ggplot(G41A_filtered, aes(x = Age_18_graduate, y = G41A, color = year_factor_G41A)) + geom_point(aes(shape = year_factor_G41A), alpha = 0.5) + geom_line(aes(y = predicted_G41A), size = 1) +  labs(title = "Importance of job status (G41A) by Age and Year",x = "Age", y = "Importance of job status (G41A)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G41A_filtered, aes(x = year_factor_G41A, y = G41A, fill = year_factor_G41A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job status (G41A) in 2005 and 2015",x = "Year", y = "Importance of job status (G41A)", fill = "Year") + theme_minimal()

G41B: Importance of decision-making

TAS_filtered_age %>% count(G41B)

## # A tibble: 7 × 2
##    G41B     n
##   <dbl> <int>
## 1     1     9
## 2     2     8
## 3     3    27
## 4     4    81
## 5     5   261
## 6     6   300
## 7     7   293

G41B_filtered <- TAS_filtered_age %>% filter(G41B < 8) %>% filter(G41B >0)

G41B_Anova <- aov(G41B ~ year_new, data = G41B_filtered)
summary(G41B_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)  
## year_new      2    7.6   3.779    2.67 0.0698 .
## Residuals   976 1381.7   1.416                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

G41B_pairwise <-TukeyHSD(G41B_Anova)
G41B_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G41B ~ year_new, data = G41B_filtered)
## 
## $year_new
##                  diff        lwr         upr     p adj
## 2009-2005  0.05566974 -0.1518130 0.263152525 0.8037614
## 2015-2005 -0.16453596 -0.3939259 0.064853942 0.2119589
## 2015-2009 -0.22020571 -0.4479069 0.007495445 0.0605596

G41B_glht <- glht(G41B_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G41B_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G41B ~ year_new, data = G41B_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)  
## 2009 - 2005 == 0  0.05567    0.08839   0.630   0.8035  
## 2015 - 2005 == 0 -0.16454    0.09773  -1.684   0.2116  
## 2015 - 2009 == 0 -0.22021    0.09701  -2.270   0.0604 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

G41B_pairwise_t_test <- pairwise.t.test(G41B_filtered$G41B, G41B_filtered$year_new, p.adjust.method = "BH")
print(G41B_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G41B_filtered$G41B and G41B_filtered$year_new 
## 
##      2005 2009
## 2009 0.53 -   
## 2015 0.14 0.07
## 
## P value adjustment method: BH

G41B_means <- G41B_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G41B),SD = sd(G41B),Count = n())
print(G41B_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      5.73  1.09   356
## 2 2009      5.78  1.20   369
## 3 2015      5.56  1.31   254

G41B_filtered$predicted_G41B <- predict(G41B_Anova)
G41B_filtered$year_factor_G41B <- factor(G41B_filtered$year)

ggplot(G41B_filtered, aes(x = Age_18_graduate, y = G41B, color = year_factor_G41B)) + geom_point(aes(shape = year_factor_G41B), alpha = 0.5) + geom_line(aes(y = predicted_G41B), size = 1) +  labs(title = "Importance of decision-making (G41B) by Age and Year",x = "Age", y = "Importance of decision-making (G41B)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G41B_filtered, aes(x = year_factor_G41B, y = G41B, fill = year_factor_G41B)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of decision-making (G41B) in 2005 and 2015",x = "Year", y = "Importance of decision-making (G41B)", fill = "Year") + theme_minimal()

G41C: Importance of challenging work

TAS_filtered_age %>% count(G41C)

## # A tibble: 8 × 2
##    G41C     n
##   <dbl> <int>
## 1     0     1
## 2     1     5
## 3     2    11
## 4     3    35
## 5     4   120
## 6     5   292
## 7     6   286
## 8     7   229

G41C_filtered <- TAS_filtered_age %>% filter(G41C < 8) %>% filter(G41C >0)

G41C_Anova <- aov(G41C ~ year_new, data = G41C_filtered)
summary(G41C_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    2.6   1.286   0.908  0.404
## Residuals   975 1381.8   1.417

G41C_pairwise <-TukeyHSD(G41C_Anova)
G41C_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G41C ~ year_new, data = G41C_filtered)
## 
## $year_new
##                  diff        lwr        upr     p adj
## 2009-2005  0.04131269 -0.1662814 0.24890681 0.8867127
## 2015-2005 -0.08883288 -0.3186104 0.14094468 0.6356998
## 2015-2009 -0.13014557 -0.3582354 0.09794429 0.3736893

G41C_glht <- glht(G41C_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G41C_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G41C ~ year_new, data = G41C_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0  0.04131    0.08844   0.467    0.887
## 2015 - 2005 == 0 -0.08883    0.09789  -0.907    0.635
## 2015 - 2009 == 0 -0.13015    0.09717  -1.339    0.373
## (Adjusted p values reported -- single-step method)

G41C_pairwise_t_test <- pairwise.t.test(G41C_filtered$G41C, G41C_filtered$year_new, p.adjust.method = "BH")
print(G41C_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G41C_filtered$G41C and G41C_filtered$year_new 
## 
##      2005 2009
## 2009 0.64 -   
## 2015 0.55 0.54
## 
## P value adjustment method: BH

G41C_means <- G41C_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G41C),SD = sd(G41C),Count = n())
print(G41C_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      5.52  1.11   356
## 2 2009      5.56  1.21   369
## 3 2015      5.43  1.27   253

G41C_filtered$predicted_G41C <- predict(G41C_Anova)
G41C_filtered$year_factor_G41C <- factor(G41C_filtered$year)

ggplot(G41C_filtered, aes(x = Age_18_graduate, y = G41C, color = year_factor_G41C)) + geom_point(aes(shape = year_factor_G41C), alpha = 0.5) + geom_line(aes(y = predicted_G41C), size = 1) +  labs(title = "Importance of challenging work (G41C) by Age and Year",x = "Age", y = "Importance of challenging work (G41C)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G41C_filtered, aes(x = year_factor_G41C, y = G41C, fill = year_factor_G41C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of challenging work (G41C) in 2005 and 2015",x = "Year", y = "Importance of challenging work (G41C)", fill = "Year") + theme_minimal()

G41H: Importance of healthcare benefits

TAS_filtered_age %>% count(G41H)

## # A tibble: 7 × 2
##    G41H     n
##   <dbl> <int>
## 1     1     3
## 2     2     5
## 3     3     9
## 4     4    31
## 5     5   111
## 6     6   229
## 7     7   591

G41H_filtered <- TAS_filtered_age %>% filter(G41H < 8) %>% filter(G41H >0)

G41H_Anova <- aov(G41H ~ year_new, data = G41H_filtered)
summary(G41H_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)  
## year_new      2    7.0   3.493   3.683 0.0255 *
## Residuals   976  925.6   0.948                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

G41H_pairwise <-TukeyHSD(G41H_Anova)
G41H_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G41H ~ year_new, data = G41H_filtered)
## 
## $year_new
##                 diff        lwr         upr     p adj
## 2009-2005  0.0431549 -0.1266595  0.21296929 0.8220175
## 2015-2005 -0.1660400 -0.3537843  0.02170430 0.0953807
## 2015-2009 -0.2091949 -0.3955570 -0.02283276 0.0232321

G41H_glht <- glht(G41H_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G41H_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G41H ~ year_new, data = G41H_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)  
## 2009 - 2005 == 0  0.04315    0.07234   0.597   0.8217  
## 2015 - 2005 == 0 -0.16604    0.07998  -2.076   0.0951 .
## 2015 - 2009 == 0 -0.20919    0.07939  -2.635   0.0232 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

G41H_pairwise_t_test <- pairwise.t.test(G41H_filtered$G41H, G41H_filtered$year_new, p.adjust.method = "BH")
print(G41H_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G41H_filtered$G41H and G41H_filtered$year_new 
## 
##      2005  2009 
## 2009 0.551 -    
## 2015 0.057 0.026
## 
## P value adjustment method: BH

G41H_means <- G41H_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G41H),SD = sd(G41H),Count = n())
print(G41H_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      6.39 0.939   356
## 2 2009      6.43 0.965   369
## 3 2015      6.22 1.03    254

G41H_filtered$predicted_G41H <- predict(G41H_Anova)
G41H_filtered$year_factor_G41H <- factor(G41H_filtered$year)

ggplot(G41H_filtered, aes(x = Age_18_graduate, y = G41H, color = year_factor_G41H)) + geom_point(aes(shape = year_factor_G41H), alpha = 0.5) + geom_line(aes(y = predicted_G41H), size = 1) +  labs(title = "Importance of healthcare benefits (G41H) by Age and Year",x = "Age", y = "Importance of healthcare benefits (G41H)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G41H_filtered, aes(x = year_factor_G41H, y = G41H, fill = year_factor_G41H)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of healthcare benefits (G41H) in 2005 and 2015",x = "Year", y = "Importance of healthcare benefits (G41H)", fill = "Year") + theme_minimal()

G41P: Importance of job central to identity

TAS_filtered_age %>% count(G41P)

## # A tibble: 9 × 2
##    G41P     n
##   <dbl> <int>
## 1     1    31
## 2     2    48
## 3     3    71
## 4     4   174
## 5     5   245
## 6     6   205
## 7     7   202
## 8     8     2
## 9     9     1

G41P_filtered <- TAS_filtered_age %>% filter(G41P < 8) %>% filter(G41P >0)

G41P_Anova <- aov(G41P ~ year_new, data = G41P_filtered)
summary(G41P_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    8.1   4.043   1.646  0.193
## Residuals   973 2390.3   2.457

G41P_pairwise <-TukeyHSD(G41P_Anova)
G41P_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = G41P ~ year_new, data = G41P_filtered)
## 
## $year_new
##                 diff        lwr        upr     p adj
## 2009-2005  0.1167978 -0.1570803 0.39067590 0.5763770
## 2015-2005 -0.1136298 -0.4159823 0.18872267 0.6517076
## 2015-2009 -0.2304276 -0.5307114 0.06985616 0.1696895

G41P_glht <- glht(G41P_Anova, linfct = mcp(year_new = "Tukey")) 
summary(G41P_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = G41P ~ year_new, data = G41P_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0   0.1168     0.1167   1.001    0.576
## 2015 - 2005 == 0  -0.1136     0.1288  -0.882    0.651
## 2015 - 2009 == 0  -0.2304     0.1279  -1.801    0.169
## (Adjusted p values reported -- single-step method)

G41P_pairwise_t_test <- pairwise.t.test(G41P_filtered$G41P, G41P_filtered$year_new, p.adjust.method = "BH")
print(G41P_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  G41P_filtered$G41P and G41P_filtered$year_new 
## 
##      2005 2009
## 2009 0.38 -   
## 2015 0.38 0.22
## 
## P value adjustment method: BH

G41P_means <- G41P_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(G41P),SD = sd(G41P),Count = n())
print(G41P_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      5.01  1.54   355
## 2 2009      5.13  1.53   367
## 3 2015      4.90  1.65   254

G41P_filtered$predicted_G41P <- predict(G41P_Anova)
G41P_filtered$year_factor_G41P <- factor(G41P_filtered$year)

ggplot(G41P_filtered, aes(x = Age_18_graduate, y = G41P, color = year_factor_G41P)) + geom_point(aes(shape = year_factor_G41P), alpha = 0.5) + geom_line(aes(y = predicted_G41P), size = 1) +  labs(title = "Importance of job central to identity (G41P) by Age and Year",x = "Age", y = "Importance of job central to identity (G41P)", color = "Year", shape = "Year") + theme_minimal()

ggplot(G41P_filtered, aes(x = year_factor_G41P, y = G41P, fill = year_factor_G41P)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job central to identity (G41P) in 2005 and 2015",x = "Year", y = "Importance of job central to identity (G41P)", fill = "Year") + theme_minimal()

H1: General Health

TAS_filtered_age %>% count(H1)

## # A tibble: 5 × 2
##      H1     n
##   <dbl> <int>
## 1     1   264
## 2     2   411
## 3     3   229
## 4     4    70
## 5     5     5

H1_filtered <- TAS_filtered_age %>% filter(H1 < 8) %>% filter(H1 >0)

H1_Anova <- aov(H1 ~ year_new, data = H1_filtered)
summary(H1_Anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## year_new      2    1.0  0.4774   0.581   0.56
## Residuals   976  802.3  0.8221

H1_pairwise <-TukeyHSD(H1_Anova)
H1_pairwise

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = H1 ~ year_new, data = H1_filtered)
## 
## $year_new
##                  diff         lwr       upr     p adj
## 2009-2005 -0.04249262 -0.20059941 0.1156142 0.8031744
## 2015-2005  0.03614085 -0.13865969 0.2109414 0.8782957
## 2015-2009  0.07863346 -0.09488021 0.2521471 0.5368712

H1_glht <- glht(H1_Anova, linfct = mcp(year_new = "Tukey")) 
summary(H1_glht)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = H1 ~ year_new, data = H1_filtered)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.04249    0.06736  -0.631    0.803
## 2015 - 2005 == 0  0.03614    0.07447   0.485    0.878
## 2015 - 2009 == 0  0.07863    0.07392   1.064    0.536
## (Adjusted p values reported -- single-step method)

H1_pairwise_t_test <- pairwise.t.test(H1_filtered$H1, H1_filtered$year_new, p.adjust.method = "BH")
print(H1_pairwise_t_test)

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  H1_filtered$H1 and H1_filtered$year_new 
## 
##      2005 2009
## 2009 0.63 -   
## 2015 0.63 0.63
## 
## P value adjustment method: BH

H1_means <- H1_filtered %>%  group_by(year_new) %>% summarise(Mean = mean(H1),SD = sd(H1),Count = n())
print(H1_means)

## # A tibble: 3 × 4
##   year_new  Mean    SD Count
##   <fct>    <dbl> <dbl> <int>
## 1 2005      2.13 0.907   356
## 2 2009      2.09 0.880   369
## 3 2015      2.17 0.943   254

H1_filtered$predicted_H1 <- predict(H1_Anova)
H1_filtered$year_factor_H1 <- factor(H1_filtered$year)

ggplot(H1_filtered, aes(x = Age_18_graduate, y = H1, color = year_factor_H1)) + geom_point(aes(shape = year_factor_H1), alpha = 0.5) + geom_line(aes(y = predicted_H1), size = 1) +  labs(title = "General Health (H1) by Age and Year",x = "Age", y = "General Health (H1)", color = "Year", shape = "Year") + theme_minimal()

ggplot(H1_filtered, aes(x = year_factor_H1, y = H1, fill = year_factor_H1)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of General Health (H1) in 2005 and 2015",x = "Year", y = "General Health (H1)", fill = "Year") + theme_minimal()

TAS_Anova

Sher May

2024-11-21

TAS Descriptive statistics

Step 1: Loading Packages

Step 2: Import the data

Step 3: Preview the data

B5A: Responsibility for self

B5D: Managing own money

B6C: Money management skills

C2D: Worry about expenses

C2E: Worry about future job

C2F: Discouraged about future

G30A: Likelihood of well-paying job

G41A: Importance of job status

G41B: Importance of decision-making

G41C: Importance of challenging work

G41H: Importance of healthcare benefits

G41P: Importance of job central to identity

H1: General Health