TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(hrbrthemes)
library(viridis)

Step 2: Import the data

TAS_original_data <- read_excel("C:/ZZ_SherMay/BHP/TAS_original_data.xlsx")

Step 3: Preview the data

view(TAS_original_data)
head(TAS_original_data)
## # A tibble: 6 × 75
##     TAS TAS05 TAS15 ER30001 ER30002 ER32000 ER32006 ER33801 ER33802 ER33803
##   <dbl> <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1     1    NA     1       4      39       2       2     289       3      60
## 2     1    NA     1       4      41       2       2    1157       3      30
## 3     1     1    NA       4     180       2       3     771       2      22
## 4     1     1    NA       5      32       2       2     624       3      30
## 5     1    NA     1       5      33       1       2    1504       3      30
## 6     1     1    NA       6      34       1       2    1202      51      30
## # ℹ 65 more variables: TA050001 <dbl>, TA050002 <dbl>, TA050003 <dbl>,
## #   TA050004 <dbl>, TA050044 <dbl>, TA050047 <dbl>, TA050050 <dbl>,
## #   TA050065 <dbl>, TA050066 <dbl>, TA050067 <dbl>, TA050070 <dbl>,
## #   TA050071 <dbl>, TA050127 <dbl>, TA050128 <dbl>, TA050129 <dbl>,
## #   TA050130 <dbl>, TA050573 <dbl>, TA050574 <dbl>, TA050575 <dbl>,
## #   TA050594 <dbl>, TA050595 <dbl>, TA050639 <dbl>, TA050663 <dbl>,
## #   TA050664 <dbl>, TA050665 <dbl>, TA050670 <dbl>, TA050675 <dbl>, …

T-test

2005

B5A: Responsibility for self

B5A_2005 <- TAS_original_data[, c(15)] %>% drop_na()
B5A_2015 <- TAS_original_data[, c(49)] %>% drop_na() %>% filter(TA150045 < 8)
B5A_t_test <- t.test(B5A_2005, B5A_2015, paired = FALSE)
print(B5A_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  B5A_2005 and B5A_2015
## t = -9.4892, df = 1393.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5822920 -0.3827857
## sample estimates:
## mean of x mean of y 
##  3.598658  4.081197
boxplot(dplyr::bind_rows(B5A_2005, B5A_2015), main = "B5A: Responsibility for self", xlab = "B5A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

B5D: Managing own money

B5D_2005 <- TAS_original_data[, c(16)] %>% drop_na()
B5D_2015 <- TAS_original_data[, c(50)] %>% drop_na() %>% filter(TA150048 < 8)
t_test_result <- t.test(B5D_2005, B5D_2015, paired = FALSE)
print(t_test_result)
## 
##  Welch Two Sample t-test
## 
## data:  B5D_2005 and B5D_2015
## t = -5.2023, df = 1308.4, p-value = 2.283e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2861747 -0.1294445
## sample estimates:
## mean of x mean of y 
##  4.391946  4.599756
boxplot(dplyr::bind_rows(B5D_2005, B5D_2015), main = "B5D: Managing own money", xlab = "B5D", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

B6C: Money management skills

B6C_2005 <- TAS_original_data[, c(17)] %>% drop_na()
B6C_2015 <- TAS_original_data[, c(51)] %>% drop_na()
B6C_t_test <- t.test(B6C_2005, B6C_2015, paired = FALSE)
print(B6C_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  B6C_2005 and B6C_2015
## t = -1.2437, df = 1284.8, p-value = 0.2138
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.19994430  0.04479255
## sample estimates:
## mean of x mean of y 
##  5.348993  5.426569
boxplot(dplyr::bind_rows(B6C_2005, B6C_2015), main = "B6C: Money management skills", xlab = "B6C", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

C2D: Worry about expenses

C2D_2005 <- TAS_original_data[, c(18)] %>% drop_na()
C2D_2015 <- TAS_original_data[, c(52)] %>% drop_na()
C2D_t_test <- t.test(C2D_2005, C2D_2015, paired = FALSE)
print(C2D_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  C2D_2005 and C2D_2015
## t = 1.5136, df = 1432.6, p-value = 0.1303
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03749392  0.29083756
## sample estimates:
## mean of x mean of y 
##  3.734228  3.607556
boxplot(dplyr::bind_rows(C2D_2005, C2D_2015), main = "C2D: Worry about expenses", xlab = "C2D", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

C2E: Worry about future job

C2E_2005 <- TAS_original_data[, c(19)] %>% drop_na()
C2E_2015 <- TAS_original_data[, c(53)] %>% drop_na()
C2E_t_test <- t.test(C2E_2005, C2E_2015, paired = FALSE)
print(C2E_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  C2E_2005 and C2E_2015
## t = 1.4229, df = 1410.4, p-value = 0.155
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04721313  0.29662236
## sample estimates:
## mean of x mean of y 
##  3.620134  3.495430
boxplot(dplyr::bind_rows(C2E_2005, C2E_2015), main = "C2E: Worry about future job", xlab = "C2E", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

C2F: Discouraged about future

C2F_2005 <- TAS_original_data[, c(20)] %>% drop_na() %>% filter(TA050067 <= 7)
C2F_2015 <- TAS_original_data[, c(54)] %>% drop_na() %>% filter(TA150068 <= 7)
C2F_t_test <- t.test(C2F_2005, C2F_2015, paired = FALSE)
print(C2F_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  C2F_2005 and C2F_2015
## t = -0.15541, df = 1395.6, p-value = 0.8765
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1703954  0.1453786
## sample estimates:
## mean of x mean of y 
##  3.074024  3.086533
boxplot(dplyr::bind_rows(C2F_2005, C2F_2015), main = "C2F: Discouraged about future", xlab = "C2F", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

G30A: Likelihood of well-paying job

G30A_2005 <- TAS_original_data[, c(32)] %>% drop_na() %>% filter(TA050639 > 0)
G30A_2015 <- TAS_original_data[, c(66)] %>% drop_na() %>% filter(TA150784 > 0) %>% filter(TA150784 < 9)
G30A_t_test <- t.test(G30A_2005, G30A_2015, paired = FALSE)
print(G30A_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  G30A_2005 and G30A_2015
## t = -0.875, df = 1306.2, p-value = 0.3817
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.13542017  0.05188025
## sample estimates:
## mean of x mean of y 
##  5.998498  6.040268
boxplot(dplyr::bind_rows(G30A_2005, G30A_2015), main = "G30A: Likelihood of well-paying job", xlab = "G30A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

G41A: Importance of job status

G41A_2005 <- TAS_original_data[, c(33)] %>% drop_na()
G41A_2015 <- TAS_original_data[, c(67)] %>% drop_na() %>% filter(TA150808 < 8)
G41A_t_test <- t.test(G41A_2005, G41A_2015, paired = FALSE)
print(G41A_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  G41A_2005 and G41A_2015
## t = 7.3884, df = 1637.4, p-value = 2.357e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4074641 0.7019929
## sample estimates:
## mean of x mean of y 
##  5.363758  4.809030
boxplot(dplyr::bind_rows(G41A_2005, G41A_2015), main = "G41A: Importance of job status", xlab = "G41A", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

G41B: Importance of decision-making

G41B_2005 <- TAS_original_data[, c(34)] %>% drop_na()
G41B_2015 <- TAS_original_data[, c(68)] %>% drop_na() %>% filter(TA150809 < 8)
G41B_t_test <- t.test(G41B_2005, G41B_2015, paired = FALSE)
print(G41B_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  G41B_2005 and G41B_2015
## t = 3.3848, df = 1688.7, p-value = 0.0007285
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0764623 0.2871741
## sample estimates:
## mean of x mean of y 
##  5.778523  5.596705
boxplot(dplyr::bind_rows(G41B_2005, G41B_2015), main = "G41B: Importance of decision-making", xlab = "G41B", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

G41C: Importance of challenging work

G41C_2005 <- TAS_original_data[, c(34)] %>% drop_na()
G41C_2015 <- TAS_original_data[, c(69)] %>% drop_na() %>% filter(TA150810 > 0) %>% filter(TA150810 < 8)
G41C_t_test <- t.test(G41C_2005, G41C_2015, paired = FALSE)
print(G41C_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  G41C_2005 and G41C_2015
## t = 5.0926, df = 1716.1, p-value = 3.921e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1774609 0.3997793
## sample estimates:
## mean of x mean of y 
##  5.778523  5.489903
boxplot(dplyr::bind_rows(G41C_2005, G41C_2015), main = "G41C: Importance of challenging work", xlab = "G41C", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

G41P: Importance of job central to identity

G41P_2005 <- TAS_original_data[, c(37)] %>% drop_na() %>% filter(TA050675 < 8)
G41P_2015 <- TAS_original_data[, c(71)] %>% drop_na() %>% filter(TA150820 < 8) 
G41P_t_test <- t.test(G41P_2005, G41P_2015, paired = FALSE)
print(G41P_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  G41P_2005 and G41P_2015
## t = 1.9078, df = 1537.8, p-value = 0.0566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003760186  0.271061275
## sample estimates:
## mean of x mean of y 
##  5.070081  4.936430
boxplot(dplyr::bind_rows(G41P_2005, G41P_2015), main = "G41P: Importance of job central to identity", xlab = "G41P", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)

H1: General Health

H1_2005 <- TAS_original_data[, c(38)] %>% drop_na() %>% filter(TA050676 < 8)
H1_2015 <- TAS_original_data[, c(72)] %>% drop_na() %>% filter(TA150821 < 8)
H1_t_test <- t.test(H1_2005, H1_2015, paired = FALSE)
print(H1_t_test)
## 
##  Welch Two Sample t-test
## 
## data:  H1_2005 and H1_2015
## t = -2.435, df = 1455, p-value = 0.01501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.18169902 -0.01956443
## sample estimates:
## mean of x mean of y 
##  2.196501  2.297132
boxplot(dplyr::bind_rows(H1_2005, H1_2015), main = "H1: General Health", xlab = "H1", ylab = "Counts", names=c("2005", "2015"), horizontal = TRUE)