We will be going through
library(tidyverse)
library(readxl)
library(hrbrthemes)
library(viridis)
TAS_original_data <- read_excel("C:/ZZ_SherMay/BHP/TAS_original_data.xlsx")
view(TAS_original_data)
head(TAS_original_data)
## # A tibble: 6 × 75
## TAS TAS05 TAS15 ER30001 ER30002 ER32000 ER32006 ER33801 ER33802 ER33803
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NA 1 4 39 2 2 289 3 60
## 2 1 NA 1 4 41 2 2 1157 3 30
## 3 1 1 NA 4 180 2 3 771 2 22
## 4 1 1 NA 5 32 2 2 624 3 30
## 5 1 NA 1 5 33 1 2 1504 3 30
## 6 1 1 NA 6 34 1 2 1202 51 30
## # ℹ 65 more variables: TA050001 <dbl>, TA050002 <dbl>, TA050003 <dbl>,
## # TA050004 <dbl>, TA050044 <dbl>, TA050047 <dbl>, TA050050 <dbl>,
## # TA050065 <dbl>, TA050066 <dbl>, TA050067 <dbl>, TA050070 <dbl>,
## # TA050071 <dbl>, TA050127 <dbl>, TA050128 <dbl>, TA050129 <dbl>,
## # TA050130 <dbl>, TA050573 <dbl>, TA050574 <dbl>, TA050575 <dbl>,
## # TA050594 <dbl>, TA050595 <dbl>, TA050639 <dbl>, TA050663 <dbl>,
## # TA050664 <dbl>, TA050665 <dbl>, TA050670 <dbl>, TA050675 <dbl>, …
B5A_2005 <- TAS_original_data[, c(15)] %>% drop_na()
B5A_2015 <- TAS_original_data[, c(49)] %>% drop_na() %>% filter(TA150045 < 8)
B5A_t_test <- t.test(B5A_2005, B5A_2015, paired = FALSE)
print(B5A_t_test)
##
## Welch Two Sample t-test
##
## data: B5A_2005 and B5A_2015
## t = -9.4892, df = 1393.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.5822920 -0.3827857
## sample estimates:
## mean of x mean of y
## 3.598658 4.081197
boxplot(dplyr::bind_rows(B5A_2005, B5A_2015), main = "B5A: Responsibility for self", xlab = "B5A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
B5D_2005 <- TAS_original_data[, c(16)] %>% drop_na()
B5D_2015 <- TAS_original_data[, c(50)] %>% drop_na() %>% filter(TA150048 < 8)
t_test_result <- t.test(B5D_2005, B5D_2015, paired = FALSE)
print(t_test_result)
##
## Welch Two Sample t-test
##
## data: B5D_2005 and B5D_2015
## t = -5.2023, df = 1308.4, p-value = 2.283e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2861747 -0.1294445
## sample estimates:
## mean of x mean of y
## 4.391946 4.599756
boxplot(dplyr::bind_rows(B5D_2005, B5D_2015), main = "B5D: Managing own money", xlab = "B5D", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
B6C_2005 <- TAS_original_data[, c(17)] %>% drop_na()
B6C_2015 <- TAS_original_data[, c(51)] %>% drop_na()
B6C_t_test <- t.test(B6C_2005, B6C_2015, paired = FALSE)
print(B6C_t_test)
##
## Welch Two Sample t-test
##
## data: B6C_2005 and B6C_2015
## t = -1.2437, df = 1284.8, p-value = 0.2138
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.19994430 0.04479255
## sample estimates:
## mean of x mean of y
## 5.348993 5.426569
boxplot(dplyr::bind_rows(B6C_2005, B6C_2015), main = "B6C: Money management skills", xlab = "B6C", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
C2D_2005 <- TAS_original_data[, c(18)] %>% drop_na()
C2D_2015 <- TAS_original_data[, c(52)] %>% drop_na()
C2D_t_test <- t.test(C2D_2005, C2D_2015, paired = FALSE)
print(C2D_t_test)
##
## Welch Two Sample t-test
##
## data: C2D_2005 and C2D_2015
## t = 1.5136, df = 1432.6, p-value = 0.1303
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03749392 0.29083756
## sample estimates:
## mean of x mean of y
## 3.734228 3.607556
boxplot(dplyr::bind_rows(C2D_2005, C2D_2015), main = "C2D: Worry about expenses", xlab = "C2D", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
C2E_2005 <- TAS_original_data[, c(19)] %>% drop_na()
C2E_2015 <- TAS_original_data[, c(53)] %>% drop_na()
C2E_t_test <- t.test(C2E_2005, C2E_2015, paired = FALSE)
print(C2E_t_test)
##
## Welch Two Sample t-test
##
## data: C2E_2005 and C2E_2015
## t = 1.4229, df = 1410.4, p-value = 0.155
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04721313 0.29662236
## sample estimates:
## mean of x mean of y
## 3.620134 3.495430
boxplot(dplyr::bind_rows(C2E_2005, C2E_2015), main = "C2E: Worry about future job", xlab = "C2E", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
C2F_2005 <- TAS_original_data[, c(20)] %>% drop_na() %>% filter(TA050067 <= 7)
C2F_2015 <- TAS_original_data[, c(54)] %>% drop_na() %>% filter(TA150068 <= 7)
C2F_t_test <- t.test(C2F_2005, C2F_2015, paired = FALSE)
print(C2F_t_test)
##
## Welch Two Sample t-test
##
## data: C2F_2005 and C2F_2015
## t = -0.15541, df = 1395.6, p-value = 0.8765
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1703954 0.1453786
## sample estimates:
## mean of x mean of y
## 3.074024 3.086533
boxplot(dplyr::bind_rows(C2F_2005, C2F_2015), main = "C2F: Discouraged about future", xlab = "C2F", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
G30A_2005 <- TAS_original_data[, c(32)] %>% drop_na() %>% filter(TA050639 > 0)
G30A_2015 <- TAS_original_data[, c(66)] %>% drop_na() %>% filter(TA150784 > 0) %>% filter(TA150784 < 9)
G30A_t_test <- t.test(G30A_2005, G30A_2015, paired = FALSE)
print(G30A_t_test)
##
## Welch Two Sample t-test
##
## data: G30A_2005 and G30A_2015
## t = -0.875, df = 1306.2, p-value = 0.3817
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.13542017 0.05188025
## sample estimates:
## mean of x mean of y
## 5.998498 6.040268
boxplot(dplyr::bind_rows(G30A_2005, G30A_2015), main = "G30A: Likelihood of well-paying job", xlab = "G30A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
G41A_2005 <- TAS_original_data[, c(33)] %>% drop_na()
G41A_2015 <- TAS_original_data[, c(67)] %>% drop_na() %>% filter(TA150808 < 8)
G41A_t_test <- t.test(G41A_2005, G41A_2015, paired = FALSE)
print(G41A_t_test)
##
## Welch Two Sample t-test
##
## data: G41A_2005 and G41A_2015
## t = 7.3884, df = 1637.4, p-value = 2.357e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4074641 0.7019929
## sample estimates:
## mean of x mean of y
## 5.363758 4.809030
boxplot(dplyr::bind_rows(G41A_2005, G41A_2015), main = "G41A: Importance of job status", xlab = "G41A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
G41B_2005 <- TAS_original_data[, c(34)] %>% drop_na()
G41B_2015 <- TAS_original_data[, c(68)] %>% drop_na() %>% filter(TA150809 < 8)
G41B_t_test <- t.test(G41B_2005, G41B_2015, paired = FALSE)
print(G41B_t_test)
##
## Welch Two Sample t-test
##
## data: G41B_2005 and G41B_2015
## t = 3.3848, df = 1688.7, p-value = 0.0007285
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0764623 0.2871741
## sample estimates:
## mean of x mean of y
## 5.778523 5.596705
boxplot(dplyr::bind_rows(G41B_2005, G41B_2015), main = "G41B: Importance of decision-making", xlab = "G41B", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
G41C_2005 <- TAS_original_data[, c(34)] %>% drop_na()
G41C_2015 <- TAS_original_data[, c(69)] %>% drop_na() %>% filter(TA150810 > 0) %>% filter(TA150810 < 8)
G41C_t_test <- t.test(G41C_2005, G41C_2015, paired = FALSE)
print(G41C_t_test)
##
## Welch Two Sample t-test
##
## data: G41C_2005 and G41C_2015
## t = 5.0926, df = 1716.1, p-value = 3.921e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1774609 0.3997793
## sample estimates:
## mean of x mean of y
## 5.778523 5.489903
boxplot(dplyr::bind_rows(G41C_2005, G41C_2015), main = "G41C: Importance of challenging work", xlab = "G41C", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
G41P_2005 <- TAS_original_data[, c(37)] %>% drop_na() %>% filter(TA050675 < 8)
G41P_2015 <- TAS_original_data[, c(71)] %>% drop_na() %>% filter(TA150820 < 8)
G41P_t_test <- t.test(G41P_2005, G41P_2015, paired = FALSE)
print(G41P_t_test)
##
## Welch Two Sample t-test
##
## data: G41P_2005 and G41P_2015
## t = 1.9078, df = 1537.8, p-value = 0.0566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.003760186 0.271061275
## sample estimates:
## mean of x mean of y
## 5.070081 4.936430
boxplot(dplyr::bind_rows(G41P_2005, G41P_2015), main = "G41P: Importance of job central to identity", xlab = "G41P", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)
H1_2005 <- TAS_original_data[, c(38)] %>% drop_na() %>% filter(TA050676 < 8)
H1_2015 <- TAS_original_data[, c(72)] %>% drop_na() %>% filter(TA150821 < 8)
H1_t_test <- t.test(H1_2005, H1_2015, paired = FALSE)
print(H1_t_test)
##
## Welch Two Sample t-test
##
## data: H1_2005 and H1_2015
## t = -2.435, df = 1455, p-value = 0.01501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.18169902 -0.01956443
## sample estimates:
## mean of x mean of y
## 2.196501 2.297132
boxplot(dplyr::bind_rows(H1_2005, H1_2015), main = "H1: General Health", xlab = "H1", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)