We will be going through
library(tidyverse)
library(readxl)
library(ggplot2)
library (reshape2)
library(writexl)
library (lmerTest)
library(lme4)
library(dplyr)
library(ggpubr)
library(rstatix)
library(effectsize)
library(multcomp)
TAS_data_long_format_age <- read_excel("TAS_data_long_format_age.xlsx") %>% unite("TAS_ID", c("1968 Interview Number", "Person Number")) %>% mutate(year_new = case_when(year == 2005 ~ -1, year == 2009 ~ 0,year == 2015 ~ 1))
view(TAS_data_long_format_age)
head(TAS_data_long_format_age)
## # A tibble: 6 × 42
## TAS TAS05 TAS09 TAS15 TAS_ID Gender `Individual is sample` `Year ID Number`
## <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2 1 1 NA 4_180 2 3 771
## 2 2 1 1 NA 5_32 2 2 624
## 3 2 1 1 NA 6_34 1 2 1202
## 4 2 1 1 NA 14_30 1 2 736
## 5 1 1 NA NA 18_38 2 2 5647
## 6 2 1 1 NA 47_34 2 2 2516
## # ℹ 34 more variables: `Sequence Number` <dbl>, `Relationship to Head` <dbl>,
## # `Release Number` <dbl>, B5A <dbl>, B5D <dbl>, B6C <dbl>, C2D <dbl>,
## # C2E <dbl>, C2F <dbl>, D2D3_month <dbl>, D2D3_year <dbl>,
## # E1_1st_mention <dbl>, E1_2nd_mention <dbl>, E1_3rd_mention <dbl>, E3 <dbl>,
## # G1 <dbl>, G2_month <dbl>, G2_year <dbl>, G10 <dbl>, G11 <dbl>, G30A <dbl>,
## # G41A <dbl>, G41B <dbl>, G41C <dbl>, G41H <dbl>, G41P <dbl>, H1 <dbl>,
## # L7_1st_mention <dbl>, L7_2nd_mention <dbl>, L7_3rd_mention <dbl>, …
TAS_data_long_format_age %>% count(year)
## # A tibble: 3 × 2
## year n
## <dbl> <int>
## 1 2005 745
## 2 2009 1554
## 3 2015 1641
TAS_data_long_format_age %>% count(Age_18_graduate)
## # A tibble: 16 × 2
## Age_18_graduate n
## <dbl> <int>
## 1 18 465
## 2 19 504
## 3 20 366
## 4 21 287
## 5 22 331
## 6 23 256
## 7 24 208
## 8 25 121
## 9 26 33
## 10 27 16
## 11 28 1
## 12 29 1
## 13 32 1
## 14 2023 146
## 15 2027 264
## 16 2033 940
TAS_data_long_format_age %>% group_by(year) %>% count(Age_18_graduate) %>% ungroup()
## # A tibble: 32 × 3
## year Age_18_graduate n
## <dbl> <dbl> <int>
## 1 2005 18 179
## 2 2005 19 169
## 3 2005 20 164
## 4 2005 21 83
## 5 2005 22 2
## 6 2005 23 2
## 7 2005 2023 146
## 8 2009 18 195
## 9 2009 19 172
## 10 2009 20 166
## # ℹ 22 more rows
#One-Way ANOVA
TAS_filtered_age <- TAS_data_long_format_age %>% group_by(TAS_ID) %>% mutate(Age_18_graduate = case_when(year == 2005 & Age_18_graduate == 2023 & any(year == 2009) ~ first(Age_18_graduate[year == 2009]) - 4, year == 2009 & Age_18_graduate == 2027 & any(year == 2005) ~ first(Age_18_graduate[year == 2005]) + 4, year == 2009 & Age_18_graduate == 2027 & any(year == 2015) ~ first(Age_18_graduate[year == 2015]) - 6, year == 2015 & Age_18_graduate == 2033 & any(year == 2009) ~ first(Age_18_graduate[year == 2009]) + 6,Age_18_graduate >= 1 & Age_18_graduate <= 50 ~ Age_18_graduate, TRUE ~ Age_18_graduate)) %>% ungroup() %>% filter(Age_18_graduate == 18 | Age_18_graduate == 19) %>% mutate(year_new = factor(year_new, levels = c(-1, 0, 1), labels = c("2005", "2009", "2015")))
view(TAS_filtered_age)
knitr::kable(head(TAS_filtered_age[, 1:42]))
| TAS | TAS05 | TAS09 | TAS15 | TAS_ID | Gender | Individual is sample | Year ID Number | Sequence Number | Relationship to Head | Release Number | B5A | B5D | B6C | C2D | C2E | C2F | D2D3_month | D2D3_year | E1_1st_mention | E1_2nd_mention | E1_3rd_mention | E3 | G1 | G2_month | G2_year | G10 | G11 | G30A | G41A | G41B | G41C | G41H | G41P | H1 | L7_1st_mention | L7_2nd_mention | L7_3rd_mention | Age_17_graduate | Age_18_graduate | year | year_new |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | NA | NA | 18_38 | 2 | 2 | 5647 | 3 | 98 | 5 | 3 | 4 | 3 | 4 | 2 | 2 | 0 | 0 | 3 | 7 | 0 | 5 | 1 | 6 | 2004 | 1 | 5 | 6 | 5 | 5 | 5 | 7 | 5 | 2 | 1 | 0 | 0 | 18 | 19 | 2005 | 2005 |
| 2 | 1 | 1 | NA | 47_34 | 2 | 2 | 2516 | 3 | 30 | 5 | 4 | 5 | 6 | 4 | 5 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2005 | 5 | 0 | 6 | 3 | 6 | 4 | 7 | 4 | 1 | 1 | 0 | 0 | 17 | 18 | 2005 | 2005 |
| 2 | 1 | 1 | NA | 53_36 | 2 | 2 | 1616 | 3 | 30 | 5 | 4 | 5 | 7 | 4 | 1 | 1 | 0 | 0 | 6 | 0 | 0 | 1 | 1 | 6 | 2005 | 1 | 5 | 7 | 7 | 7 | 5 | 7 | 6 | 2 | 1 | 0 | 0 | 17 | 18 | 2005 | 2005 |
| 2 | 1 | 1 | NA | 79_32 | 2 | 2 | 6520 | 2 | 30 | 5 | 3 | 4 | 6 | 7 | 5 | 3 | 0 | 0 | 1 | 7 | 0 | 0 | 1 | 5 | 2004 | 1 | 1 | 0 | 7 | 7 | 6 | 7 | 4 | 1 | 1 | 0 | 0 | 18 | 19 | 2005 | 2005 |
| 2 | 1 | 1 | NA | 88_35 | 1 | 2 | 3411 | 2 | 30 | 5 | 2 | 5 | 7 | 3 | 1 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 2005 | 1 | 1 | 7 | 2 | 6 | 5 | 6 | 7 | 2 | 1 | 0 | 0 | 17 | 18 | 2005 | 2005 |
| 2 | 1 | 1 | NA | 89_34 | 2 | 2 | 4527 | 3 | 30 | 5 | 2 | 4 | 5 | 2 | 3 | 1 | 0 | 0 | 3 | 7 | 0 | 5 | 1 | 5 | 2005 | 1 | 1 | 7 | 5 | 6 | 4 | 7 | 5 | 1 | 1 | 0 | 0 | 17 | 18 | 2005 | 2005 |
TAS_filtered_age %>% group_by(year) %>% count(Age_18_graduate)
## # A tibble: 6 × 3
## # Groups: year [3]
## year Age_18_graduate n
## <dbl> <dbl> <int>
## 1 2005 18 185
## 2 2005 19 171
## 3 2009 18 196
## 4 2009 19 173
## 5 2015 18 91
## 6 2015 19 163
TAS_filtered_age %>% count(B5A)
## # A tibble: 6 × 2
## B5A n
## <dbl> <int>
## 1 1 48
## 2 2 202
## 3 3 243
## 4 4 318
## 5 5 167
## 6 9 1
B5A_filtered <- TAS_filtered_age %>% filter(B5A < 8) %>% filter(B5A >0)
B5A_Anova <- aov(B5A ~ year_new, data = B5A_filtered)
summary(B5A_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 4.7 2.336 1.826 0.162
## Residuals 975 1247.2 1.279
B5A_pairwise <-TukeyHSD(B5A_Anova)
B5A_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = B5A ~ year_new, data = B5A_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 -0.09379580 -0.29115264 0.1035610 0.5046582
## 2015-2005 0.08028842 -0.13776097 0.2983378 0.6629646
## 2015-2009 0.17408422 -0.04247977 0.3906482 0.1430000
B5A_glht <- glht(B5A_Anova, linfct = mcp(year_new = "Tukey"))
summary(B5A_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = B5A ~ year_new, data = B5A_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.09380 0.08408 -1.116 0.504
## 2015 - 2005 == 0 0.08029 0.09289 0.864 0.663
## 2015 - 2009 == 0 0.17408 0.09226 1.887 0.143
## (Adjusted p values reported -- single-step method)
B5A_pairwise_t_test <- pairwise.t.test(B5A_filtered$B5A, B5A_filtered$year_new, p.adjust.method = "BH")
print(B5A_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: B5A_filtered$B5A and B5A_filtered$year_new
##
## 2005 2009
## 2009 0.39 -
## 2015 0.39 0.18
##
## P value adjustment method: BH
B5A_means <- B5A_filtered %>% group_by(year_new) %>% summarise(Mean = mean(B5A),SD = sd(B5A),Count = n())
print(B5A_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 3.38 1.11 356
## 2 2009 3.28 1.15 368
## 3 2015 3.46 1.14 254
B5A_filtered$predicted_B5A <- predict(B5A_Anova)
B5A_filtered$year_factor_B5A <- factor(B5A_filtered$year)
ggplot(B5A_filtered, aes(x = Age_18_graduate, y = B5A, color = year_factor_B5A)) + geom_point(aes(shape = year_factor_B5A), alpha = 0.5) + geom_line(aes(y = predicted_B5A), size = 1) + labs(title = "Responsibility for Self (B5A) by Age and Year",x = "Age", y = "Responsibility for Self (B5A)", color = "Year", shape = "Year") + theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(B5A_filtered, aes(x = year_factor_B5A, y = B5A, fill = year_factor_B5A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (B5A) in 2005 and 2015",x = "Year", y = "Responsibility for Self (B5A)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(B5D)
## # A tibble: 6 × 2
## B5D n
## <dbl> <int>
## 1 1 32
## 2 2 34
## 3 3 95
## 4 4 249
## 5 5 568
## 6 9 1
B5D_filtered <- TAS_filtered_age %>% filter(B5D < 8) %>% filter(B5D >0)
B5D_Anova <- aov(B5D ~ year_new, data = B5D_filtered)
summary(B5D_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 0.0 0.0042 0.004 0.996
## Residuals 975 989.4 1.0147
B5D_pairwise <-TukeyHSD(B5D_Anova)
B5D_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = B5D ~ year_new, data = B5D_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.0005190523 -0.1752583 0.1762964 0.9999735
## 2015-2005 -0.0063921083 -0.2005994 0.1878152 0.9967147
## 2015-2009 -0.0069111606 -0.1997955 0.1859732 0.9961078
B5D_glht <- glht(B5D_Anova, linfct = mcp(year_new = "Tukey"))
summary(B5D_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = B5D ~ year_new, data = B5D_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.0005191 0.0748852 0.007 1.000
## 2015 - 2005 == 0 -0.0063921 0.0827368 -0.077 0.997
## 2015 - 2009 == 0 -0.0069112 0.0821731 -0.084 0.996
## (Adjusted p values reported -- single-step method)
B5D_pairwise_t_test <- pairwise.t.test(B5D_filtered$B5D, B5D_filtered$year_new, p.adjust.method = "BH")
print(B5D_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: B5D_filtered$B5D and B5D_filtered$year_new
##
## 2005 2009
## 2009 0.99 -
## 2015 0.99 0.99
##
## P value adjustment method: BH
B5D_means <- B5D_filtered %>% group_by(year_new) %>% summarise(Mean = mean(B5D),SD = sd(B5D),Count = n())
print(B5D_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 4.32 0.957 356
## 2 2009 4.32 1.03 368
## 3 2015 4.31 1.05 254
B5D_filtered$predicted_B5D <- predict(B5D_Anova)
B5D_filtered$year_factor_B5D <- factor(B5D_filtered$year)
ggplot(B5D_filtered, aes(x = Age_18_graduate, y = B5D, color = year_factor_B5D)) + geom_point(aes(shape = year_factor_B5D), alpha = 0.5) + geom_line(aes(y = predicted_B5D), size = 1) + labs(title = "Managing own money (B5D) by Age and Year",x = "Age", y = "Managing own money (B5D)", color = "Year", shape = "Year") + theme_minimal()
ggplot(B5D_filtered, aes(x = year_factor_B5D, y = B5D, fill = year_factor_B5D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Managing own money (B5D) in 2005 and 2015",x = "Year", y = "Managing own money (B5D)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(B6C)
## # A tibble: 7 × 2
## B6C n
## <dbl> <int>
## 1 1 9
## 2 2 23
## 3 3 52
## 4 4 122
## 5 5 276
## 6 6 239
## 7 7 258
B6C_filtered <- TAS_filtered_age %>% filter(B6C < 8) %>% filter(B6C >0)
B6C_Anova <- aov(B6C ~ year_new, data = B6C_filtered)
summary(B6C_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 4.6 2.297 1.271 0.281
## Residuals 976 1763.8 1.807
B6C_pairwise <-TukeyHSD(B6C_Anova)
B6C_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = B6C ~ year_new, data = B6C_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.12987577 -0.1045438 0.3642953 0.3952346
## 2015-2005 -0.02430771 -0.2834785 0.2348631 0.9736358
## 2015-2009 -0.15418347 -0.4114462 0.1030793 0.3377514
B6C_glht <- glht(B6C_Anova, linfct = mcp(year_new = "Tukey"))
summary(B6C_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = B6C ~ year_new, data = B6C_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.12988 0.09987 1.300 0.395
## 2015 - 2005 == 0 -0.02431 0.11041 -0.220 0.974
## 2015 - 2009 == 0 -0.15418 0.10960 -1.407 0.337
## (Adjusted p values reported -- single-step method)
B6C_pairwise_t_test <- pairwise.t.test(B6C_filtered$B6C, B6C_filtered$year_new, p.adjust.method = "BH")
print(B6C_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: B6C_filtered$B6C and B6C_filtered$year_new
##
## 2005 2009
## 2009 0.29 -
## 2015 0.83 0.29
##
## P value adjustment method: BH
B6C_means <- B6C_filtered %>% group_by(year_new) %>% summarise(Mean = mean(B6C),SD = sd(B6C),Count = n())
print(B6C_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 5.39 1.43 356
## 2 2009 5.52 1.23 369
## 3 2015 5.37 1.37 254
B6C_filtered$predicted_B6C <- predict(B6C_Anova)
B6C_filtered$year_factor_B6C <- factor(B6C_filtered$year)
ggplot(B6C_filtered, aes(x = Age_18_graduate, y = B6C, color = year_factor_B6C)) + geom_point(aes(shape = year_factor_B6C), alpha = 0.5) + geom_line(aes(y = predicted_B6C), size = 1) + labs(title = "Money management skills (B6C) by Age and Year",x = "Age", y = "Money management skills (B6C)", color = "Year", shape = "Year") + theme_minimal()
ggplot(B6C_filtered, aes(x = year_factor_B6C, y = B6C, fill = year_factor_B6C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Money management skills (B6C) in 2005 and 2015",x = "Year", y = "Money management skills (B6C)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(C2D)
## # A tibble: 7 × 2
## C2D n
## <dbl> <int>
## 1 1 165
## 2 2 158
## 3 3 159
## 4 4 171
## 5 5 144
## 6 6 84
## 7 7 98
C2D_filtered <- TAS_filtered_age %>% filter(C2D < 8) %>% filter(C2D >0)
C2D_Anova <- aov(C2D ~ year_new, data = C2D_filtered)
summary(C2D_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 8 4.105 1.146 0.318
## Residuals 976 3494 3.580
C2D_pairwise <-TukeyHSD(C2D_Anova)
C2D_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = C2D ~ year_new, data = C2D_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 -0.04100058 -0.3709610 0.2889599 0.9541902
## 2015-2005 -0.22586924 -0.5906686 0.1389302 0.3140844
## 2015-2009 -0.18486866 -0.5469824 0.1772451 0.4544183
C2D_glht <- glht(C2D_Anova, linfct = mcp(year_new = "Tukey"))
summary(C2D_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = C2D ~ year_new, data = C2D_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.0410 0.1406 -0.292 0.954
## 2015 - 2005 == 0 -0.2259 0.1554 -1.453 0.314
## 2015 - 2009 == 0 -0.1849 0.1543 -1.198 0.454
## (Adjusted p values reported -- single-step method)
C2D_pairwise_t_test <- pairwise.t.test(C2D_filtered$C2D, C2D_filtered$year_new, p.adjust.method = "BH")
print(C2D_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: C2D_filtered$C2D and C2D_filtered$year_new
##
## 2005 2009
## 2009 0.77 -
## 2015 0.35 0.35
##
## P value adjustment method: BH
C2D_means <- C2D_filtered %>% group_by(year_new) %>% summarise(Mean = mean(C2D),SD = sd(C2D),Count = n())
print(C2D_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 3.70 1.88 356
## 2 2009 3.66 1.93 369
## 3 2015 3.48 1.85 254
C2D_filtered$predicted_C2D <- predict(C2D_Anova)
C2D_filtered$year_factor_C2D <- factor(C2D_filtered$year)
ggplot(C2D_filtered, aes(x = Age_18_graduate, y = C2D, color = year_factor_C2D)) + geom_point(aes(shape = year_factor_C2D), alpha = 0.5) + geom_line(aes(y = predicted_C2D), size = 1) + labs(title = "Worry about expense (C2D) by Age and Year",x = "Age", y = "Worry about expense (C2D)", color = "Year", shape = "Year") + theme_minimal()
ggplot(C2D_filtered, aes(x = year_factor_C2D, y = C2D, fill = year_factor_C2D)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about expense (C2D) in 2005 and 2015",x = "Year", y = "Worry about expense (C2D)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(C2E)
## # A tibble: 7 × 2
## C2E n
## <dbl> <int>
## 1 1 174
## 2 2 175
## 3 3 158
## 4 4 139
## 5 5 133
## 6 6 88
## 7 7 112
C2E_filtered <- TAS_filtered_age %>% filter(C2E < 8) %>% filter(C2E >0)
C2E_Anova <- aov(C2E ~ year_new, data = C2E_filtered)
summary(C2E_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 1 0.433 0.112 0.894
## Residuals 976 3765 3.857
C2E_pairwise <-TukeyHSD(C2E_Anova)
C2E_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = C2E ~ year_new, data = C2E_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.061310557 -0.2811726 0.4037937 0.9072802
## 2015-2005 0.062505530 -0.3161388 0.4411499 0.9205857
## 2015-2009 0.001194973 -0.3746618 0.3770518 0.9999693
C2E_glht <- glht(C2E_Anova, linfct = mcp(year_new = "Tukey"))
summary(C2E_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = C2E ~ year_new, data = C2E_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.061311 0.145906 0.420 0.907
## 2015 - 2005 == 0 0.062506 0.161311 0.387 0.920
## 2015 - 2009 == 0 0.001195 0.160124 0.007 1.000
## (Adjusted p values reported -- single-step method)
C2E_pairwise_t_test <- pairwise.t.test(C2E_filtered$C2E, C2E_filtered$year_new, p.adjust.method = "BH")
print(C2E_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: C2E_filtered$C2E and C2E_filtered$year_new
##
## 2005 2009
## 2009 0.99 -
## 2015 0.99 0.99
##
## P value adjustment method: BH
C2E_means <- C2E_filtered %>% group_by(year_new) %>% summarise(Mean = mean(C2E),SD = sd(C2E),Count = n())
print(C2E_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 3.57 1.95 356
## 2 2009 3.63 1.97 369
## 3 2015 3.63 1.98 254
C2E_filtered$predicted_C2E <- predict(C2E_Anova)
C2E_filtered$year_factor_C2E <- factor(C2E_filtered$year)
ggplot(C2E_filtered, aes(x = Age_18_graduate, y = C2E, color = year_factor_C2E)) + geom_point(aes(shape = year_factor_C2E), alpha = 0.5) + geom_line(aes(y = predicted_C2E), size = 1) + labs(title = "Worry about future job (C2E) by Age and Year",x = "Age", y = "Worry about future job (C2E)", color = "Year", shape = "Year") + theme_minimal()
ggplot(C2E_filtered, aes(x = year_factor_C2E, y = C2E, fill = year_factor_C2E)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Worry about future job (C2E) in 2005 and 2015",x = "Year", y = "Worry about future job (C2E)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(C2F)
## # A tibble: 7 × 2
## C2F n
## <dbl> <int>
## 1 1 212
## 2 2 239
## 3 3 164
## 4 4 150
## 5 5 106
## 6 6 54
## 7 7 54
C2F_filtered <- TAS_filtered_age %>% filter(C2F < 8) %>% filter(C2F >0)
C2F_Anova <- aov(C2F ~ year_new, data = C2F_filtered)
summary(C2F_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 2.6 1.304 0.424 0.655
## Residuals 976 3002.3 3.076
C2F_pairwise <-TukeyHSD(C2F_Anova)
C2F_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = C2F ~ year_new, data = C2F_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.11554155 -0.1903038 0.4213869 0.6488063
## 2015-2005 0.02702822 -0.3111099 0.3651663 0.9807805
## 2015-2009 -0.08851333 -0.4241621 0.2471354 0.8097490
C2F_glht <- glht(C2F_Anova, linfct = mcp(year_new = "Tukey"))
summary(C2F_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = C2F ~ year_new, data = C2F_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.11554 0.13030 0.887 0.648
## 2015 - 2005 == 0 0.02703 0.14405 0.188 0.981
## 2015 - 2009 == 0 -0.08851 0.14299 -0.619 0.809
## (Adjusted p values reported -- single-step method)
C2F_pairwise_t_test <- pairwise.t.test(C2F_filtered$C2F, C2F_filtered$year_new, p.adjust.method = "BH")
print(C2F_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: C2F_filtered$C2F and C2F_filtered$year_new
##
## 2005 2009
## 2009 0.80 -
## 2015 0.85 0.80
##
## P value adjustment method: BH
C2F_means <- C2F_filtered %>% group_by(year_new) %>% summarise(Mean = mean(C2F),SD = sd(C2F),Count = n())
print(C2F_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 3.03 1.75 356
## 2 2009 3.14 1.77 369
## 3 2015 3.06 1.74 254
C2F_filtered$predicted_C2F <- predict(C2F_Anova)
C2F_filtered$year_factor_C2F <- factor(C2F_filtered$year)
ggplot(C2F_filtered, aes(x = Age_18_graduate, y = C2F, color = year_factor_C2F)) + geom_point(aes(shape = year_factor_C2F), alpha = 0.5) + geom_line(aes(y = predicted_C2F), size = 1) + labs(title = "Responsibility for Self (C2F) by Age and Year",x = "Age", y = "Responsibility for Self (C2F)", color = "Year", shape = "Year") + theme_minimal()
ggplot(C2F_filtered, aes(x = year_factor_C2F, y = C2F, fill = year_factor_C2F)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Responsibility for Self (C2F) in 2005 and 2015",x = "Year", y = "Responsibility for Self (C2F)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G30A)
## # A tibble: 7 × 2
## G30A n
## <dbl> <int>
## 1 0 36
## 2 1 3
## 3 3 5
## 4 4 52
## 5 5 178
## 6 6 333
## 7 7 372
G30A_filtered <- TAS_filtered_age %>% filter(G30A < 8) %>% filter(G30A >0)
G30A_Anova <- aov(G30A ~ year_new, data = G30A_filtered)
summary(G30A_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 0.3 0.1491 0.16 0.852
## Residuals 940 873.9 0.9297
G30A_pairwise <-TukeyHSD(G30A_Anova)
G30A_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G30A ~ year_new, data = G30A_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 -0.01642954 -0.1893221 0.1564630 0.9729423
## 2015-2005 0.02805118 -0.1621525 0.2182549 0.9360776
## 2015-2009 0.04448072 -0.1400501 0.2290115 0.8383074
G30A_glht <- glht(G30A_Anova, linfct = mcp(year_new = "Tukey"))
summary(G30A_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G30A ~ year_new, data = G30A_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.01643 0.07365 -0.223 0.973
## 2015 - 2005 == 0 0.02805 0.08103 0.346 0.936
## 2015 - 2009 == 0 0.04448 0.07861 0.566 0.838
## (Adjusted p values reported -- single-step method)
G30A_pairwise_t_test <- pairwise.t.test(G30A_filtered$G30A, G30A_filtered$year_new, p.adjust.method = "BH")
print(G30A_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G30A_filtered$G30A and G30A_filtered$year_new
##
## 2005 2009
## 2009 0.82 -
## 2015 0.82 0.82
##
## P value adjustment method: BH
G30A_means <- G30A_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G30A),SD = sd(G30A),Count = n())
print(G30A_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 6.06 0.948 320
## 2 2009 6.05 1.00 369
## 3 2015 6.09 0.930 254
G30A_filtered$predicted_G30A <- predict(G30A_Anova)
G30A_filtered$year_factor_G30A <- factor(G30A_filtered$year)
ggplot(G30A_filtered, aes(x = Age_18_graduate, y = G30A, color = year_factor_G30A)) + geom_point(aes(shape = year_factor_G30A), alpha = 0.5) + geom_line(aes(y = predicted_G30A), size = 1) + labs(title = "Likelihood of well-paying job (G30A) by Age and Year",x = "Age", y = "Likelihood of well-paying job (G30A)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G30A_filtered, aes(x = year_factor_G30A, y = G30A, fill = year_factor_G30A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Likelihood of well-paying job (G30A) in 2005 and 2015",x = "Year", y = "Likelihood of well-paying job (G30A)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G41A)
## # A tibble: 8 × 2
## G41A n
## <dbl> <int>
## 1 1 38
## 2 2 36
## 3 3 76
## 4 4 116
## 5 5 239
## 6 6 199
## 7 7 274
## 8 9 1
G41A_filtered <- TAS_filtered_age %>% filter(G41A < 8) %>% filter(G41A >0)
G41A_Anova <- aov(G41A ~ year_new, data = G41A_filtered)
summary(G41A_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 42.2 21.091 8.046 0.000342 ***
## Residuals 975 2555.8 2.621
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
G41A_pairwise <-TukeyHSD(G41A_Anova)
G41A_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G41A ~ year_new, data = G41A_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 -0.03083781 -0.3133562 0.2516806 0.9644608
## 2015-2005 -0.48834380 -0.8004838 -0.1762038 0.0007404
## 2015-2009 -0.45750599 -0.7675196 -0.1474923 0.0016083
G41A_glht <- glht(G41A_Anova, linfct = mcp(year_new = "Tukey"))
summary(G41A_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G41A ~ year_new, data = G41A_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.03084 0.12036 -0.256 0.964399
## 2015 - 2005 == 0 -0.48834 0.13298 -3.672 0.000721 ***
## 2015 - 2009 == 0 -0.45751 0.13207 -3.464 0.001589 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
G41A_pairwise_t_test <- pairwise.t.test(G41A_filtered$G41A, G41A_filtered$year_new, p.adjust.method = "BH")
print(G41A_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G41A_filtered$G41A and G41A_filtered$year_new
##
## 2005 2009
## 2009 0.79784 -
## 2015 0.00076 0.00083
##
## P value adjustment method: BH
G41A_means <- G41A_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G41A),SD = sd(G41A),Count = n())
print(G41A_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 5.36 1.63 356
## 2 2009 5.33 1.53 368
## 3 2015 4.87 1.73 254
G41A_filtered$predicted_G41A <- predict(G41A_Anova)
G41A_filtered$year_factor_G41A <- factor(G41A_filtered$year)
ggplot(G41A_filtered, aes(x = Age_18_graduate, y = G41A, color = year_factor_G41A)) + geom_point(aes(shape = year_factor_G41A), alpha = 0.5) + geom_line(aes(y = predicted_G41A), size = 1) + labs(title = "Importance of job status (G41A) by Age and Year",x = "Age", y = "Importance of job status (G41A)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G41A_filtered, aes(x = year_factor_G41A, y = G41A, fill = year_factor_G41A)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job status (G41A) in 2005 and 2015",x = "Year", y = "Importance of job status (G41A)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G41B)
## # A tibble: 7 × 2
## G41B n
## <dbl> <int>
## 1 1 9
## 2 2 8
## 3 3 27
## 4 4 81
## 5 5 261
## 6 6 300
## 7 7 293
G41B_filtered <- TAS_filtered_age %>% filter(G41B < 8) %>% filter(G41B >0)
G41B_Anova <- aov(G41B ~ year_new, data = G41B_filtered)
summary(G41B_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 7.6 3.779 2.67 0.0698 .
## Residuals 976 1381.7 1.416
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
G41B_pairwise <-TukeyHSD(G41B_Anova)
G41B_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G41B ~ year_new, data = G41B_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.05566974 -0.1518130 0.263152525 0.8037614
## 2015-2005 -0.16453596 -0.3939259 0.064853942 0.2119589
## 2015-2009 -0.22020571 -0.4479069 0.007495445 0.0605596
G41B_glht <- glht(G41B_Anova, linfct = mcp(year_new = "Tukey"))
summary(G41B_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G41B ~ year_new, data = G41B_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.05567 0.08839 0.630 0.8035
## 2015 - 2005 == 0 -0.16454 0.09773 -1.684 0.2116
## 2015 - 2009 == 0 -0.22021 0.09701 -2.270 0.0604 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
G41B_pairwise_t_test <- pairwise.t.test(G41B_filtered$G41B, G41B_filtered$year_new, p.adjust.method = "BH")
print(G41B_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G41B_filtered$G41B and G41B_filtered$year_new
##
## 2005 2009
## 2009 0.53 -
## 2015 0.14 0.07
##
## P value adjustment method: BH
G41B_means <- G41B_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G41B),SD = sd(G41B),Count = n())
print(G41B_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 5.73 1.09 356
## 2 2009 5.78 1.20 369
## 3 2015 5.56 1.31 254
G41B_filtered$predicted_G41B <- predict(G41B_Anova)
G41B_filtered$year_factor_G41B <- factor(G41B_filtered$year)
ggplot(G41B_filtered, aes(x = Age_18_graduate, y = G41B, color = year_factor_G41B)) + geom_point(aes(shape = year_factor_G41B), alpha = 0.5) + geom_line(aes(y = predicted_G41B), size = 1) + labs(title = "Importance of decision-making (G41B) by Age and Year",x = "Age", y = "Importance of decision-making (G41B)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G41B_filtered, aes(x = year_factor_G41B, y = G41B, fill = year_factor_G41B)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of decision-making (G41B) in 2005 and 2015",x = "Year", y = "Importance of decision-making (G41B)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G41C)
## # A tibble: 8 × 2
## G41C n
## <dbl> <int>
## 1 0 1
## 2 1 5
## 3 2 11
## 4 3 35
## 5 4 120
## 6 5 292
## 7 6 286
## 8 7 229
G41C_filtered <- TAS_filtered_age %>% filter(G41C < 8) %>% filter(G41C >0)
G41C_Anova <- aov(G41C ~ year_new, data = G41C_filtered)
summary(G41C_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 2.6 1.286 0.908 0.404
## Residuals 975 1381.8 1.417
G41C_pairwise <-TukeyHSD(G41C_Anova)
G41C_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G41C ~ year_new, data = G41C_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.04131269 -0.1662814 0.24890681 0.8867127
## 2015-2005 -0.08883288 -0.3186104 0.14094468 0.6356998
## 2015-2009 -0.13014557 -0.3582354 0.09794429 0.3736893
G41C_glht <- glht(G41C_Anova, linfct = mcp(year_new = "Tukey"))
summary(G41C_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G41C ~ year_new, data = G41C_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.04131 0.08844 0.467 0.887
## 2015 - 2005 == 0 -0.08883 0.09789 -0.907 0.635
## 2015 - 2009 == 0 -0.13015 0.09717 -1.339 0.373
## (Adjusted p values reported -- single-step method)
G41C_pairwise_t_test <- pairwise.t.test(G41C_filtered$G41C, G41C_filtered$year_new, p.adjust.method = "BH")
print(G41C_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G41C_filtered$G41C and G41C_filtered$year_new
##
## 2005 2009
## 2009 0.64 -
## 2015 0.55 0.54
##
## P value adjustment method: BH
G41C_means <- G41C_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G41C),SD = sd(G41C),Count = n())
print(G41C_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 5.52 1.11 356
## 2 2009 5.56 1.21 369
## 3 2015 5.43 1.27 253
G41C_filtered$predicted_G41C <- predict(G41C_Anova)
G41C_filtered$year_factor_G41C <- factor(G41C_filtered$year)
ggplot(G41C_filtered, aes(x = Age_18_graduate, y = G41C, color = year_factor_G41C)) + geom_point(aes(shape = year_factor_G41C), alpha = 0.5) + geom_line(aes(y = predicted_G41C), size = 1) + labs(title = "Importance of challenging work (G41C) by Age and Year",x = "Age", y = "Importance of challenging work (G41C)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G41C_filtered, aes(x = year_factor_G41C, y = G41C, fill = year_factor_G41C)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of challenging work (G41C) in 2005 and 2015",x = "Year", y = "Importance of challenging work (G41C)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G41H)
## # A tibble: 7 × 2
## G41H n
## <dbl> <int>
## 1 1 3
## 2 2 5
## 3 3 9
## 4 4 31
## 5 5 111
## 6 6 229
## 7 7 591
G41H_filtered <- TAS_filtered_age %>% filter(G41H < 8) %>% filter(G41H >0)
G41H_Anova <- aov(G41H ~ year_new, data = G41H_filtered)
summary(G41H_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 7.0 3.493 3.683 0.0255 *
## Residuals 976 925.6 0.948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
G41H_pairwise <-TukeyHSD(G41H_Anova)
G41H_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G41H ~ year_new, data = G41H_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.0431549 -0.1266595 0.21296929 0.8220175
## 2015-2005 -0.1660400 -0.3537843 0.02170430 0.0953807
## 2015-2009 -0.2091949 -0.3955570 -0.02283276 0.0232321
G41H_glht <- glht(G41H_Anova, linfct = mcp(year_new = "Tukey"))
summary(G41H_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G41H ~ year_new, data = G41H_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.04315 0.07234 0.597 0.8217
## 2015 - 2005 == 0 -0.16604 0.07998 -2.076 0.0951 .
## 2015 - 2009 == 0 -0.20919 0.07939 -2.635 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
G41H_pairwise_t_test <- pairwise.t.test(G41H_filtered$G41H, G41H_filtered$year_new, p.adjust.method = "BH")
print(G41H_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G41H_filtered$G41H and G41H_filtered$year_new
##
## 2005 2009
## 2009 0.551 -
## 2015 0.057 0.026
##
## P value adjustment method: BH
G41H_means <- G41H_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G41H),SD = sd(G41H),Count = n())
print(G41H_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 6.39 0.939 356
## 2 2009 6.43 0.965 369
## 3 2015 6.22 1.03 254
G41H_filtered$predicted_G41H <- predict(G41H_Anova)
G41H_filtered$year_factor_G41H <- factor(G41H_filtered$year)
ggplot(G41H_filtered, aes(x = Age_18_graduate, y = G41H, color = year_factor_G41H)) + geom_point(aes(shape = year_factor_G41H), alpha = 0.5) + geom_line(aes(y = predicted_G41H), size = 1) + labs(title = "Importance of healthcare benefits (G41H) by Age and Year",x = "Age", y = "Importance of healthcare benefits (G41H)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G41H_filtered, aes(x = year_factor_G41H, y = G41H, fill = year_factor_G41H)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of healthcare benefits (G41H) in 2005 and 2015",x = "Year", y = "Importance of healthcare benefits (G41H)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(G41P)
## # A tibble: 9 × 2
## G41P n
## <dbl> <int>
## 1 1 31
## 2 2 48
## 3 3 71
## 4 4 174
## 5 5 245
## 6 6 205
## 7 7 202
## 8 8 2
## 9 9 1
G41P_filtered <- TAS_filtered_age %>% filter(G41P < 8) %>% filter(G41P >0)
G41P_Anova <- aov(G41P ~ year_new, data = G41P_filtered)
summary(G41P_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 8.1 4.043 1.646 0.193
## Residuals 973 2390.3 2.457
G41P_pairwise <-TukeyHSD(G41P_Anova)
G41P_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = G41P ~ year_new, data = G41P_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 0.1167978 -0.1570803 0.39067590 0.5763770
## 2015-2005 -0.1136298 -0.4159823 0.18872267 0.6517076
## 2015-2009 -0.2304276 -0.5307114 0.06985616 0.1696895
G41P_glht <- glht(G41P_Anova, linfct = mcp(year_new = "Tukey"))
summary(G41P_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = G41P ~ year_new, data = G41P_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 0.1168 0.1167 1.001 0.576
## 2015 - 2005 == 0 -0.1136 0.1288 -0.882 0.651
## 2015 - 2009 == 0 -0.2304 0.1279 -1.801 0.169
## (Adjusted p values reported -- single-step method)
G41P_pairwise_t_test <- pairwise.t.test(G41P_filtered$G41P, G41P_filtered$year_new, p.adjust.method = "BH")
print(G41P_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: G41P_filtered$G41P and G41P_filtered$year_new
##
## 2005 2009
## 2009 0.38 -
## 2015 0.38 0.22
##
## P value adjustment method: BH
G41P_means <- G41P_filtered %>% group_by(year_new) %>% summarise(Mean = mean(G41P),SD = sd(G41P),Count = n())
print(G41P_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 5.01 1.54 355
## 2 2009 5.13 1.53 367
## 3 2015 4.90 1.65 254
G41P_filtered$predicted_G41P <- predict(G41P_Anova)
G41P_filtered$year_factor_G41P <- factor(G41P_filtered$year)
ggplot(G41P_filtered, aes(x = Age_18_graduate, y = G41P, color = year_factor_G41P)) + geom_point(aes(shape = year_factor_G41P), alpha = 0.5) + geom_line(aes(y = predicted_G41P), size = 1) + labs(title = "Importance of job central to identity (G41P) by Age and Year",x = "Age", y = "Importance of job central to identity (G41P)", color = "Year", shape = "Year") + theme_minimal()
ggplot(G41P_filtered, aes(x = year_factor_G41P, y = G41P, fill = year_factor_G41P)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of Importance of job central to identity (G41P) in 2005 and 2015",x = "Year", y = "Importance of job central to identity (G41P)", fill = "Year") + theme_minimal()
TAS_filtered_age %>% count(H1)
## # A tibble: 5 × 2
## H1 n
## <dbl> <int>
## 1 1 264
## 2 2 411
## 3 3 229
## 4 4 70
## 5 5 5
H1_filtered <- TAS_filtered_age %>% filter(H1 < 8) %>% filter(H1 >0)
H1_Anova <- aov(H1 ~ year_new, data = H1_filtered)
summary(H1_Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## year_new 2 1.0 0.4774 0.581 0.56
## Residuals 976 802.3 0.8221
H1_pairwise <-TukeyHSD(H1_Anova)
H1_pairwise
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = H1 ~ year_new, data = H1_filtered)
##
## $year_new
## diff lwr upr p adj
## 2009-2005 -0.04249262 -0.20059941 0.1156142 0.8031744
## 2015-2005 0.03614085 -0.13865969 0.2109414 0.8782957
## 2015-2009 0.07863346 -0.09488021 0.2521471 0.5368712
H1_glht <- glht(H1_Anova, linfct = mcp(year_new = "Tukey"))
summary(H1_glht)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = H1 ~ year_new, data = H1_filtered)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2009 - 2005 == 0 -0.04249 0.06736 -0.631 0.803
## 2015 - 2005 == 0 0.03614 0.07447 0.485 0.878
## 2015 - 2009 == 0 0.07863 0.07392 1.064 0.536
## (Adjusted p values reported -- single-step method)
H1_pairwise_t_test <- pairwise.t.test(H1_filtered$H1, H1_filtered$year_new, p.adjust.method = "BH")
print(H1_pairwise_t_test)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: H1_filtered$H1 and H1_filtered$year_new
##
## 2005 2009
## 2009 0.63 -
## 2015 0.63 0.63
##
## P value adjustment method: BH
H1_means <- H1_filtered %>% group_by(year_new) %>% summarise(Mean = mean(H1),SD = sd(H1),Count = n())
print(H1_means)
## # A tibble: 3 × 4
## year_new Mean SD Count
## <fct> <dbl> <dbl> <int>
## 1 2005 2.13 0.907 356
## 2 2009 2.09 0.880 369
## 3 2015 2.17 0.943 254
H1_filtered$predicted_H1 <- predict(H1_Anova)
H1_filtered$year_factor_H1 <- factor(H1_filtered$year)
ggplot(H1_filtered, aes(x = Age_18_graduate, y = H1, color = year_factor_H1)) + geom_point(aes(shape = year_factor_H1), alpha = 0.5) + geom_line(aes(y = predicted_H1), size = 1) + labs(title = "General Health (H1) by Age and Year",x = "Age", y = "General Health (H1)", color = "Year", shape = "Year") + theme_minimal()
ggplot(H1_filtered, aes(x = year_factor_H1, y = H1, fill = year_factor_H1)) + geom_boxplot() + stat_summary(fun = "mean", geom = "crossbar", width = 0.75, color = "black", size = 0.2, linetype = "dashed") + labs(title = "Box Plot of General Health (H1) in 2005 and 2015",x = "Year", y = "General Health (H1)", fill = "Year") + theme_minimal()