Global

#Global options
knitr::opts_chunk$set(echo = TRUE, 
                      message = FALSE, 
                      warning = FALSE, 
                      include = TRUE,
                      cache = FALSE)

knitr::opts_knit$set(progress = TRUE, verbose = TRUE)

# auto format (kable)
options(kableExtra.auto_format = FALSE)

#round

options("scipen"=100, "digits"=4)
#get file
load(file="C:/Users/luisf/Dropbox/Puc-Rio/Projeto - Junia/Base R - Pesquisa mapfre.RData")

Packages

library(tidyverse) #environment
library(psych) #psychometric analysis
library(summarytools) #descriptive

Data checking

Dataset <- janitor::clean_names(Dataset)

Dataset <- Dataset %>% 
  mutate(id = row_number())

Boxplot

plot_bdi <- Dataset %>% 
  select(starts_with("bdi")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()

plot_bai <- Dataset %>% 
  select(starts_with("bai")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()

gridExtra::grid.arrange(plot_bdi, plot_bai)

Missing

Dataset %>% 
  select(starts_with("bdi")) %>% 
  DataExplorer::plot_missing()

Dataset %>% 
  select(starts_with("bai")) %>% 
  DataExplorer::plot_missing()

Locating participants will all missing

Dataset %>% 
  select(id,country,starts_with("bdi")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bdi")) %>% ncol()) %>% #all missing
  count(country, id)

## # A tibble: 8 x 3
##   country     id     n
##   <fct>    <int> <int>
## 1 SPAIN        9     1
## 2 SPAIN       10     1
## 3 SPAIN      486     1
## 4 SPAIN      532     1
## 5 PORTUGAL  1217     1
## 6 PORTUGAL  1218     1
## 7 PORTUGAL  1441     1
## 8 BRAZIL    1795     1

Dataset %>% 
  select(id,country,starts_with("bai")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bai")) %>% ncol()) %>% #all missing
  count(country, id)

## # A tibble: 5 x 3
##   country     id     n
##   <fct>    <int> <int>
## 1 SPAIN        8     1
## 2 SPAIN        9     1
## 3 SPAIN       10     1
## 4 PORTUGAL  1217     1
## 5 PORTUGAL  1218     1

Comparing missing data

imputed_data_bdi <- Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  mice::mice(., m=5, maxit = 50, method = 'pmm', seed = 500)

summary(imputed_Data_bdi)

#get complete data ( 2nd out of 5)

complete_data_bdi <- mice::complete(imputed_data_bdi,2)

#check
DataExplorer::plot_missing(complete_data_bdi)

#add sums
complete_data_bdi <- complete_data_bdi %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T))

Comparing

comparable_results <- bind_rows(Dataset %>% 
                                 mutate(base = "original") %>% 
                                 select(base, bdi_sum),
                             
                               complete_data_bdi %>% 
                                 mutate(base = "imputada") %>% 
                                 select(base, bdi_sum))

# Check correlation
comparable_results %>% 
  unstack(bdi_sum~base) %>% 
  ggplot(., aes(x = original, y = imputada)) +
  geom_jitter() + 
  geom_smooth()

#check correlation
cor(Dataset$bdi_sum, complete_data_bdi$bdi_sum)

#check distributionn
ggplot(comparable_results, aes(x = base, y = bdi_sum)) +
  geom_boxplot()

wilcox.test(bdi_sum ~ base, data = comparable_results, paired = TRUE)

Composite scores

Depression

Add total scores

Dataset <- Dataset %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, then do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bdi = rowSums(is.na(select(.,starts_with("bdi"))))) %>% #create a variable to count missing
  mutate(bdi_sum = if_else(number_na_bdi == 21, NA_real_,bdi_sum)) #if all is missing

Add classification

Dataset <- Dataset %>% 
  mutate(bdi_class = case_when(
    bdi_sum <= 13 ~ "minima",
    bdi_sum < 20 ~ "leve",
    bdi_sum < 29 ~ "moderada",
    bdi_sum >= 29 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bdi_class = factor(bdi_class,
         levels=c("minima","leve","moderada","grave")))

Anxiety

Add total scores

Dataset <- Dataset %>% 
  mutate(bai_sum = rowSums(select(., starts_with("bai")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, then do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bai = rowSums(is.na(select(.,starts_with("bai"))))) %>% #create a variable to count missing
  mutate(bai_sum = if_else(number_na_bai == 21, NA_real_,bai_sum)) #if all is missing

Add classification

Dataset <- Dataset %>% 
  mutate(bai_class = case_when(
    bai_sum <= 10 ~ "minima",
    bai_sum < 20 ~ "leve",
    bai_sum < 31 ~ "moderada",
    bai_sum >= 31 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bai_class = factor(bai_class,
         levels=c("minima","leve","moderada","grave")))

Cronbach’s alpha

Depression

Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  alpha()

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.89       0.9     0.9      0.29 8.7 0.0034 0.44 0.37     0.29
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.89 0.9 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bdi_1       0.89      0.89    0.89      0.29 8.1   0.0036 0.0049  0.29
## bdi_2       0.89      0.89    0.90      0.30 8.4   0.0035 0.0050  0.30
## bdi_3       0.89      0.89    0.90      0.29 8.2   0.0035 0.0049  0.29
## bdi_4       0.89      0.89    0.90      0.29 8.2   0.0036 0.0050  0.29
## bdi_5       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_6       0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_7       0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_8       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_9       0.89      0.90    0.90      0.30 8.6   0.0034 0.0045  0.30
## bdi_10      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_11      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.30
## bdi_12      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_13      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_14      0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_15      0.89      0.89    0.89      0.29 8.1   0.0036 0.0046  0.29
## bdi_16      0.89      0.89    0.90      0.30 8.5   0.0035 0.0048  0.30
## bdi_17      0.89      0.89    0.90      0.29 8.2   0.0036 0.0053  0.29
## bdi_18      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.29
## bdi_19      0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_20      0.89      0.89    0.89      0.29 8.2   0.0036 0.0048  0.29
## bdi_21      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop  mean   sd
## bdi_1  1946  0.64  0.66  0.65   0.60 0.248 0.53
## bdi_2  1947  0.52  0.52  0.48   0.45 0.543 0.69
## bdi_3  1949  0.58  0.60  0.57   0.53 0.298 0.57
## bdi_4  1947  0.62  0.62  0.60   0.56 0.375 0.59
## bdi_5  1946  0.56  0.56  0.53   0.50 0.488 0.61
## bdi_6  1946  0.48  0.50  0.46   0.43 0.162 0.51
## bdi_7  1945  0.66  0.65  0.64   0.60 0.386 0.70
## bdi_8  1949  0.59  0.58  0.55   0.52 0.790 0.72
## bdi_9  1947  0.40  0.44  0.40   0.36 0.072 0.32
## bdi_10 1947  0.52  0.51  0.47   0.45 0.342 0.69
## bdi_11 1941  0.54  0.53  0.49   0.47 0.544 0.69
## bdi_12 1943  0.60  0.61  0.58   0.54 0.433 0.62
## bdi_13 1943  0.61  0.60  0.57   0.54 0.464 0.79
## bdi_14 1941  0.64  0.64  0.62   0.59 0.307 0.67
## bdi_15 1939  0.66  0.65  0.64   0.60 0.565 0.69
## bdi_16 1933  0.53  0.51  0.48   0.45 0.855 0.78
## bdi_17 1938  0.60  0.60  0.57   0.54 0.434 0.64
## bdi_18 1942  0.55  0.54  0.50   0.48 0.535 0.71
## bdi_19 1942  0.59  0.57  0.54   0.52 0.756 0.80
## bdi_20 1942  0.63  0.62  0.60   0.57 0.527 0.69
## bdi_21 1939  0.48  0.51  0.47   0.44 0.136 0.45
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bdi_1  0.79 0.17 0.03 0.01 0.01
## bdi_2  0.55 0.38 0.05 0.02 0.01
## bdi_3  0.75 0.20 0.04 0.01 0.00
## bdi_4  0.68 0.28 0.04 0.01 0.01
## bdi_5  0.56 0.40 0.03 0.01 0.01
## bdi_6  0.88 0.10 0.01 0.02 0.01
## bdi_7  0.72 0.20 0.06 0.02 0.01
## bdi_8  0.37 0.50 0.11 0.02 0.00
## bdi_9  0.94 0.05 0.00 0.01 0.01
## bdi_10 0.77 0.14 0.07 0.02 0.01
## bdi_11 0.55 0.39 0.04 0.02 0.01
## bdi_12 0.63 0.33 0.03 0.01 0.01
## bdi_13 0.67 0.24 0.04 0.05 0.01
## bdi_14 0.80 0.10 0.09 0.01 0.01
## bdi_15 0.54 0.37 0.08 0.01 0.01
## bdi_16 0.35 0.49 0.12 0.04 0.01
## bdi_17 0.64 0.31 0.04 0.01 0.01
## bdi_18 0.57 0.34 0.06 0.02 0.01
## bdi_19 0.46 0.34 0.18 0.02 0.01
## bdi_20 0.57 0.35 0.06 0.02 0.01
## bdi_21 0.90 0.08 0.02 0.01 0.01

Anxiety

Dataset %>% 
  select(starts_with("bai"), -bai_sum, -bai_class) %>% 
  alpha()

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##        0.9       0.9    0.91      0.31 9.4 0.0032 0.41 0.39     0.31
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.9 0.91 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bai_1       0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## bai_2       0.90       0.9    0.91      0.32 9.2   0.0032 0.0077  0.31
## bai_3       0.90       0.9    0.91      0.31 9.0   0.0033 0.0082  0.31
## bai_4       0.89       0.9    0.91      0.31 8.8   0.0034 0.0074  0.30
## bai_5       0.89       0.9    0.90      0.30 8.7   0.0034 0.0071  0.30
## bai_6       0.90       0.9    0.91      0.31 9.0   0.0033 0.0080  0.30
## bai_7       0.89       0.9    0.91      0.31 8.8   0.0034 0.0075  0.30
## bai_8       0.89       0.9    0.91      0.31 8.8   0.0033 0.0078  0.30
## bai_9       0.90       0.9    0.91      0.31 8.9   0.0033 0.0075  0.31
## bai_10      0.89       0.9    0.90      0.30 8.7   0.0034 0.0074  0.30
## bai_11      0.89       0.9    0.90      0.30 8.8   0.0034 0.0074  0.30
## bai_12      0.89       0.9    0.90      0.31 8.8   0.0033 0.0077  0.30
## bai_13      0.90       0.9    0.90      0.30 8.8   0.0033 0.0074  0.30
## bai_14      0.90       0.9    0.91      0.31 8.9   0.0033 0.0079  0.31
## bai_15      0.90       0.9    0.91      0.31 9.0   0.0033 0.0076  0.30
## bai_16      0.90       0.9    0.91      0.32 9.4   0.0032 0.0068  0.31
## bai_17      0.89       0.9    0.90      0.31 8.8   0.0034 0.0076  0.30
## bai_18      0.90       0.9    0.91      0.31 9.1   0.0033 0.0082  0.31
## bai_19      0.90       0.9    0.91      0.31 9.0   0.0033 0.0081  0.31
## bai_20      0.90       0.9    0.91      0.32 9.4   0.0031 0.0070  0.31
## bai_21      0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## bai_1  1944  0.53  0.53  0.49   0.47 0.41 0.63
## bai_2  1946  0.49  0.48  0.44   0.42 0.69 0.75
## bai_3  1940  0.55  0.56  0.53   0.50 0.34 0.63
## bai_4  1941  0.66  0.63  0.62   0.59 0.89 0.89
## bai_5  1947  0.70  0.68  0.67   0.64 0.69 0.90
## bai_6  1949  0.57  0.58  0.55   0.52 0.28 0.61
## bai_7  1948  0.65  0.65  0.63   0.60 0.34 0.68
## bai_8  1948  0.63  0.63  0.61   0.58 0.36 0.65
## bai_9  1942  0.60  0.61  0.59   0.55 0.20 0.53
## bai_10 1946  0.70  0.67  0.66   0.63 1.04 0.88
## bai_11 1944  0.65  0.66  0.65   0.61 0.22 0.59
## bai_12 1946  0.63  0.64  0.63   0.58 0.30 0.63
## bai_13 1942  0.63  0.65  0.64   0.59 0.17 0.47
## bai_14 1947  0.58  0.59  0.56   0.53 0.24 0.59
## bai_15 1945  0.57  0.59  0.57   0.52 0.22 0.57
## bai_16 1947  0.41  0.41  0.36   0.34 0.18 0.56
## bai_17 1940  0.65  0.65  0.63   0.60 0.39 0.69
## bai_18 1944  0.56  0.55  0.51   0.49 0.55 0.77
## bai_19 1939  0.53  0.56  0.53   0.49 0.14 0.44
## bai_20 1940  0.43  0.43  0.38   0.36 0.50 0.71
## bai_21 1944  0.53  0.53  0.50   0.47 0.37 0.66
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bai_1  0.67 0.26 0.07 0.00 0.01
## bai_2  0.47 0.38 0.14 0.01 0.01
## bai_3  0.74 0.19 0.07 0.01 0.01
## bai_4  0.41 0.35 0.19 0.05 0.01
## bai_5  0.56 0.24 0.15 0.05 0.01
## bai_6  0.80 0.13 0.06 0.01 0.00
## bai_7  0.76 0.16 0.07 0.01 0.00
## bai_8  0.73 0.20 0.06 0.01 0.00
## bai_9  0.85 0.10 0.04 0.01 0.01
## bai_10 0.30 0.42 0.21 0.07 0.01
## bai_11 0.85 0.09 0.05 0.01 0.01
## bai_12 0.78 0.15 0.06 0.01 0.01
## bai_13 0.87 0.10 0.03 0.00 0.01
## bai_14 0.83 0.12 0.04 0.01 0.01
## bai_15 0.85 0.10 0.04 0.01 0.01
## bai_16 0.89 0.06 0.03 0.02 0.01
## bai_17 0.71 0.20 0.07 0.01 0.01
## bai_18 0.60 0.27 0.10 0.02 0.01
## bai_19 0.89 0.08 0.03 0.00 0.01
## bai_20 0.61 0.29 0.09 0.01 0.01
## bai_21 0.72 0.21 0.06 0.01 0.01

Traditional analysis

Depression

mod_completo_bdi <- lm(bdi_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bdi)

## 
## 
## ANOVA results using bdi_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  35232.64    1 35232.64 595.18 .000             
##          sex    528.14    1   528.14   8.92 .003          .00
##      country   1179.55    2   589.77   9.96 .000          .01
##        Error 114723.93 1938    59.20                         
##  CI_90_partial_eta2
##                    
##          [.00, .01]
##          [.00, .02]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

#normality
olsrr::ols_test_normality(mod_completo_bdi)

## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8825         0.0000 
## Kolmogorov-Smirnov        0.1111         0.0000 
## Cramer-von Mises         171.717         0.0000 
## Anderson-Darling         47.4803         0.0000 
## -----------------------------------------------

#equal variance
olsrr::ols_test_breusch_pagan(mod_completo_bdi)

## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bdi_sum 
##  Variables: fitted values of bdi_sum 
## 
##       Test Summary        
##  -------------------------
##  DF            =    1 
##  Chi2          =    2.5017 
##  Prob > Chi2   =    0.1137

#all plots
lindia::gg_diagnose(mod_completo_bdi)

Anxiety

mod_completo_bai <- lm(bai_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bai)

## 
## 
## ANOVA results using bai_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  23701.50    1 23701.50 372.46 .000             
##          sex   3248.35    1  3248.35  51.05 .000          .03
##      country    252.51    2   126.25   1.98 .138          .00
##        Error 123515.25 1941    63.63                         
##  CI_90_partial_eta2
##                    
##          [.02, .04]
##          [.00, .01]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

#normality
olsrr::ols_test_normality(mod_completo_bai)

## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8549         0.0000 
## Kolmogorov-Smirnov        0.1427         0.0000 
## Cramer-von Mises         186.2753        0.0000 
## Anderson-Darling         72.9096         0.0000 
## -----------------------------------------------

#equal variance
olsrr::ols_test_breusch_pagan(mod_completo_bai)

## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bai_sum 
##  Variables: fitted values of bai_sum 
## 
##          Test Summary           
##  -------------------------------
##  DF            =    1 
##  Chi2          =    27.6053 
##  Prob > Chi2   =    0.0000001488

#all plots
lindia::gg_diagnose(mod_completo_bai)

Table 1

Country

Dataset %>% 
  freq(country)

## Frequencies  
## Dataset$country  
## Type: Factor  
## 
##                  Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## -------------- ------ --------- -------------- --------- --------------
##          SPAIN   1216     62.14          62.14     62.14          62.14
##       PORTUGAL    426     21.77          83.90     21.77          83.90
##         BRAZIL    315     16.10         100.00     16.10         100.00
##           <NA>      0                               0.00         100.00
##          Total   1957    100.00         100.00    100.00         100.00

Dataset %>% 
  group_by(country) %>% 
  freq(sex)

## Frequencies  
## Dataset$sex  
## Type: Factor  
## Group: country = SPAIN  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    384     31.76          31.76     31.58          31.58
##           F    825     68.24         100.00     67.85          99.42
##        <NA>      7                               0.58         100.00
##       Total   1216    100.00         100.00    100.00         100.00
## 
## Group: country = PORTUGAL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    203     47.65          47.65     47.65          47.65
##           F    223     52.35         100.00     52.35         100.00
##        <NA>      0                               0.00         100.00
##       Total    426    100.00         100.00    100.00         100.00
## 
## Group: country = BRAZIL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    149     47.30          47.30     47.30          47.30
##           F    166     52.70         100.00     52.70         100.00
##        <NA>      0                               0.00         100.00
##       Total    315    100.00         100.00    100.00         100.00

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(sex))$p.value)

## # A tibble: 3 x 2
##   country    pvalue
##   <fct>       <dbl>
## 1 SPAIN    7.34e-37
## 2 PORTUGAL 3.33e- 1
## 3 BRAZIL   3.38e- 1

Age

Dataset %>% 
  group_by(country) %>% 
  descr(age)

## Descriptive Statistics  
## age by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean     21.49      20.43    22.83
##           Std.Dev      3.02       1.66     7.18
##               Min     18.00      18.00    17.00
##                Q1     20.00      19.00    20.00
##            Median     21.00      20.00    21.00
##                Q3     23.00      22.00    23.00
##               Max     48.00      23.00    68.00
##               MAD      2.97       1.48     2.97
##               IQR      3.00       3.00     3.00
##                CV      0.14       0.08     0.31
##          Skewness      2.59       0.02     3.65
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis     13.68      -1.27    15.34
##           N.Valid   1210.00     426.00   293.00
##         Pct.Valid     99.51     100.00    93.02

Dataset %>% 
  group_by(sex) %>% 
  descr(age)

## Descriptive Statistics  
## age by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F      NA
## ----------------- -------- --------- -------
##              Mean    21.30     21.56   21.33
##           Std.Dev     3.31      4.11    4.16
##               Min    17.00     17.00   18.00
##                Q1    19.00     19.00   18.00
##            Median    21.00     21.00   20.00
##                Q3    22.00     22.00   26.00
##               Max    48.00     68.00   26.00
##               MAD     1.48      2.97    2.97
##               IQR     3.00      3.00    4.00
##                CV     0.16      0.19    0.20
##          Skewness     3.01      5.53    0.29
##       SE.Skewness     0.09      0.07    1.22
##          Kurtosis    14.69     45.70   -2.33
##           N.Valid   720.00   1206.00    3.00
##         Pct.Valid    97.83     99.34   42.86

Dataset %>% 
  group_by(sex, country) %>% 
  descr(age)

## Descriptive Statistics  
## age by sex  
## Data Frame: Dataset  
## N: 384  
## 
##                     sex = M, country = SPAIN   sex = M, country = PORTUGAL
## ----------------- -------------------------- -----------------------------
##              Mean                      21.43                         20.51
##           Std.Dev                       3.48                          1.64
##               Min                      18.00                         18.00
##                Q1                      19.00                         19.00
##            Median                      21.00                         21.00
##                Q3                      22.00                         22.00
##               Max                      48.00                         23.00
##               MAD                       2.97                          1.48
##               IQR                       3.00                          3.00
##                CV                       0.16                          0.08
##          Skewness                       2.85                         -0.06
##       SE.Skewness                       0.12                          0.17
##          Kurtosis                      14.11                         -1.23
##           N.Valid                     384.00                        203.00
##         Pct.Valid                     100.00                        100.00
## 
## Table: Table continues below
## 
##  
## 
##                     sex = M, country = BRAZIL   sex = F, country = SPAIN
## ----------------- --------------------------- --------------------------
##              Mean                       22.13                      21.52
##           Std.Dev                        4.31                       2.77
##               Min                       17.00                      18.00
##                Q1                       20.00                      20.00
##            Median                       21.00                      21.00
##                Q3                       23.00                      23.00
##               Max                       41.00                      43.00
##               MAD                        1.48                       2.97
##               IQR                        3.00                       3.00
##                CV                        0.19                       0.13
##          Skewness                        2.40                       2.29
##       SE.Skewness                        0.21                       0.09
##          Kurtosis                        6.45                      11.63
##           N.Valid                      133.00                     823.00
##         Pct.Valid                       89.26                      99.76
## 
## Table: Table continues below
## 
##  
## 
##                     sex = F, country = PORTUGAL   sex = F, country = BRAZIL
## ----------------- ----------------------------- ---------------------------
##              Mean                         20.37                       23.41
##           Std.Dev                          1.68                        8.87
##               Min                         18.00                       17.00
##                Q1                         19.00                       19.00
##            Median                         20.00                       21.00
##                Q3                         22.00                       23.00
##               Max                         23.00                       68.00
##               MAD                          1.48                        2.97
##               IQR                          3.00                        4.00
##                CV                          0.08                        0.38
##          Skewness                          0.09                        3.18
##       SE.Skewness                          0.16                        0.19
##          Kurtosis                         -1.30                       10.26
##           N.Valid                        223.00                      160.00
##         Pct.Valid                        100.00                       96.39
## 
## Table: Table continues below
## 
##  
## 
##                     sex = NA, country = SPAIN
## ----------------- ---------------------------
##              Mean                       21.33
##           Std.Dev                        4.16
##               Min                       18.00
##                Q1                       18.00
##            Median                       20.00
##                Q3                       26.00
##               Max                       26.00
##               MAD                        2.97
##               IQR                        4.00
##                CV                        0.20
##          Skewness                        0.29
##       SE.Skewness                        1.22
##          Kurtosis                       -2.33
##           N.Valid                        3.00
##         Pct.Valid                       42.86
##  Descriptive Statistics  
## age by country  
## Data Frame: Dataset  
## N: 384  
## 
##                     sex = M, country = SPAIN   sex = M, country = PORTUGAL
## ----------------- -------------------------- -----------------------------
##              Mean                      21.43                         20.51
##           Std.Dev                       3.48                          1.64
##               Min                      18.00                         18.00
##                Q1                      19.00                         19.00
##            Median                      21.00                         21.00
##                Q3                      22.00                         22.00
##               Max                      48.00                         23.00
##               MAD                       2.97                          1.48
##               IQR                       3.00                          3.00
##                CV                       0.16                          0.08
##          Skewness                       2.85                         -0.06
##       SE.Skewness                       0.12                          0.17
##          Kurtosis                      14.11                         -1.23
##           N.Valid                     384.00                        203.00
##         Pct.Valid                     100.00                        100.00
## 
## Table: Table continues below
## 
##  
## 
##                     sex = M, country = BRAZIL   sex = F, country = SPAIN
## ----------------- --------------------------- --------------------------
##              Mean                       22.13                      21.52
##           Std.Dev                        4.31                       2.77
##               Min                       17.00                      18.00
##                Q1                       20.00                      20.00
##            Median                       21.00                      21.00
##                Q3                       23.00                      23.00
##               Max                       41.00                      43.00
##               MAD                        1.48                       2.97
##               IQR                        3.00                       3.00
##                CV                        0.19                       0.13
##          Skewness                        2.40                       2.29
##       SE.Skewness                        0.21                       0.09
##          Kurtosis                        6.45                      11.63
##           N.Valid                      133.00                     823.00
##         Pct.Valid                       89.26                      99.76
## 
## Table: Table continues below
## 
##  
## 
##                     sex = F, country = PORTUGAL   sex = F, country = BRAZIL
## ----------------- ----------------------------- ---------------------------
##              Mean                         20.37                       23.41
##           Std.Dev                          1.68                        8.87
##               Min                         18.00                       17.00
##                Q1                         19.00                       19.00
##            Median                         20.00                       21.00
##                Q3                         22.00                       23.00
##               Max                         23.00                       68.00
##               MAD                          1.48                        2.97
##               IQR                          3.00                        4.00
##                CV                          0.08                        0.38
##          Skewness                          0.09                        3.18
##       SE.Skewness                          0.16                        0.19
##          Kurtosis                         -1.30                       10.26
##           N.Valid                        223.00                      160.00
##         Pct.Valid                        100.00                       96.39
## 
## Table: Table continues below
## 
##  
## 
##                     sex = NA, country = SPAIN
## ----------------- ---------------------------
##              Mean                       21.33
##           Std.Dev                        4.16
##               Min                       18.00
##                Q1                       18.00
##            Median                       20.00
##                Q3                       26.00
##               Max                       26.00
##               MAD                        2.97
##               IQR                        4.00
##                CV                        0.20
##          Skewness                        0.29
##       SE.Skewness                        1.22
##          Kurtosis                       -2.33
##           N.Valid                        3.00
##         Pct.Valid                       42.86

Dataset %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(t_test = map(data, ~t.test(.$age ~ .$sex)$p.value)) %>%  
  unnest(t_test)

## # A tibble: 3 x 3
## # Groups:   country [3]
##   country             data t_test
##   <fct>    <list<df[,93]>>  <dbl>
## 1 SPAIN       [1,216 × 93]  0.669
## 2 PORTUGAL      [426 × 93]  0.387
## 3 BRAZIL        [315 × 93]  0.109

This table was checked twice. Last checked on May 14 2020.

Table 2

Depression

By country

Dataset %>%
  group_by(country) %>% 
  descr(bdi_sum)

## Descriptive Statistics  
## bdi_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.86       9.05    10.89
##           Std.Dev      7.54       7.73     8.29
##               Min      0.00       0.00     0.00
##                Q1      3.00       4.00     5.00
##            Median      7.00       7.00     9.50
##                Q3     12.00      12.00    15.00
##               Max     57.00      53.00    41.00
##               MAD      5.93       5.93     8.15
##               IQR      9.00       8.00    10.00
##                CV      0.85       0.85     0.76
##          Skewness      1.77       1.76     1.01
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      5.12       5.11     0.80
##           N.Valid   1212.00     423.00   314.00
##         Pct.Valid     99.67      99.30    99.68

kruskal.test(bdi_sum ~ country, data = Dataset)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  bdi_sum by country
## Kruskal-Wallis chi-squared = 18, df = 2, p-value = 0.0001

Dataset %>% 
  rstatix::kruskal_effsize(bdi_sum ~ country) #effect size

## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bdi_sum  1957 0.00802 eta2[H] small

Post hoc

PMCMR::posthoc.kruskal.nemenyi.test(x=Dataset$bdi_sum, g=Dataset$country, method="Tukey")

## 
##  Pairwise comparisons using Tukey and Kramer (Nemenyi) test  
##                    with Tukey-Dist approximation for independent samples 
## 
## data:  Dataset$bdi_sum and Dataset$country 
## 
##          SPAIN    PORTUGAL
## PORTUGAL 0.9351   -       
## BRAZIL   0.000098 0.0031  
## 
## P value adjustment method: none

Dataset %>% 
  rstatix::dunn_test(bdi_sum ~ country, p.adjust.method = "bonferroni")

## # A tibble: 3 x 11
##   .y.   group1 group2    n1    n2 estimate statistic       p method   p.adj
## * <chr> <chr>  <chr>  <int> <int>    <dbl>     <dbl>   <dbl> <chr>    <dbl>
## 1 bdi_… SPAIN  PORTU…  1212   423     11.1     0.349 7.27e-1 Dunn … 1.00e+0
## 2 bdi_… SPAIN  BRAZIL  1212   314    148.      4.16  3.22e-5 Dunn … 9.67e-5
## 3 bdi_… PORTU… BRAZIL   423   314    137.      3.27  1.08e-3 Dunn … 3.24e-3
## # … with 1 more variable: p.adj.signif <chr>

dunn.test::dunn.test(Dataset$bdi_sum,Dataset$country,method = "Bonferroni")

##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 17.675, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |     BRAZIL   PORTUGAL
## ---------+----------------------
## PORTUGAL |   3.269105
##          |    0.0016*
##          |
##    SPAIN |   4.157182   0.349355
##          |    0.0000*     1.0000
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

Dataset %>% 
  group_by(country) %>% 
  filter(!is.na(bdi_class)) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)

## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      80.4  11.4     5.69  2.56
## 2 PORTUGAL   79.0  11.8     6.38  2.84
## 3 BRAZIL     68.8  16.2    10.8   4.14

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bdi_class))$p.value)

## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    0.       
## 2 PORTUGAL 1.48e-143
## 3 BRAZIL   2.67e- 71

By sex

Dataset %>%
  group_by(sex) %>% 
  descr(bdi_sum)

## Descriptive Statistics  
## bdi_sum by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F       NA
## ----------------- -------- --------- --------
##              Mean     8.68      9.59     5.14
##           Std.Dev     7.74      7.73     5.46
##               Min     0.00      0.00     1.00
##                Q1     3.00      4.00     2.00
##            Median     7.00      8.00     3.00
##                Q3    12.00     13.00     9.00
##               Max    57.00     53.00    16.00
##               MAD     5.93      7.41     1.48
##               IQR     9.00      9.00     4.00
##                CV     0.89      0.81     1.06
##          Skewness     1.89      1.47     1.04
##       SE.Skewness     0.09      0.07     0.79
##          Kurtosis     5.63      3.29    -0.65
##           N.Valid   732.00   1210.00     7.00
##         Pct.Valid    99.46     99.67   100.00

wilcox.test(bdi_sum ~ sex, data = Dataset)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bdi_sum by sex
## W = 404216, p-value = 0.001
## alternative hypothesis: true location shift is not equal to 0

Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bdi_sum, sex,distribution = "asymptotic")}

## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 732) | 2 = F (n = 1210):
##   U = 672494.000, W = 404216.000, p = 0.001, Z = -3.231
##   effect-size r =    0.073
##    rank-mean(1) =  918.71
##    rank-mean(2) = 1003.44

Dataset %>% 
  filter(!is.na(sex), !is.na(bdi_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)

## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       81.0  10.7     5.05  3.28
## 2 F       76.4  13.2     7.69  2.64

Individual chi square for each sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

Anxiety

Dataset %>%
  group_by(country) %>% 
  descr(bai_sum)

## Descriptive Statistics  
## bai_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.55       7.92     9.01
##           Std.Dev      8.06       8.04     8.40
##               Min      0.00       0.00     0.00
##                Q1      3.00       2.00     3.00
##            Median      6.00       5.00     6.00
##                Q3     12.00      11.00    12.00
##               Max     48.00      45.00    46.00
##               MAD      5.93       5.93     5.93
##               IQR      9.00       9.00     9.00
##                CV      0.94       1.02     0.93
##          Skewness      1.68       1.82     1.64
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      3.38       3.87     3.31
##           N.Valid   1213.00     424.00   315.00
##         Pct.Valid     99.75      99.53   100.00

kruskal.test(bai_sum ~ country, data = Dataset)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  bai_sum by country
## Kruskal-Wallis chi-squared = 5.8, df = 2, p-value = 0.06

Dataset %>% 
  rstatix::kruskal_effsize(bai_sum ~ country) #effect size

## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bai_sum  1957 0.00192 eta2[H] small

Dataset %>% 
  group_by(country) %>%
  filter(!is.na(bai_class)) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)

## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      71.2  19.0     7.17  2.64
## 2 PORTUGAL   72.6  18.4     5.90  3.07
## 3 BRAZIL     67.3  21.6     8.25  2.86

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    2.04e-314
## 2 PORTUGAL 8.19e-116
## 3 BRAZIL   6.20e- 70

By sex

Dataset %>%
  group_by(sex) %>% 
  descr(bai_sum)

## Descriptive Statistics  
## bai_sum by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F       NA
## ----------------- -------- --------- --------
##              Mean     6.81      9.47    12.14
##           Std.Dev     7.14      8.45    15.07
##               Min     0.00      0.00     0.00
##                Q1     2.00      4.00     2.00
##            Median     5.00      7.00     9.00
##                Q3     9.00     13.00    14.00
##               Max    44.00     48.00    44.00
##               MAD     4.45      5.93     8.90
##               IQR     7.00      9.00    11.00
##                CV     1.05      0.89     1.24
##          Skewness     2.08      1.52     1.20
##       SE.Skewness     0.09      0.07     0.79
##          Kurtosis     5.51      2.70    -0.04
##           N.Valid   731.00   1214.00     7.00
##         Pct.Valid    99.32    100.00   100.00

wilcox.test(bai_sum ~ sex, data = Dataset)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bai_sum by sex
## W = 348512, p-value = 0.000000000000002
## alternative hypothesis: true location shift is not equal to 0

Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bai_sum, sex,distribution = "asymptotic")}

## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 731) | 2 = F (n = 1214):
##   U = 616057.500, W = 348511.500, p < 0.001, Z = -7.951
##   effect-size r =    0.180
##    rank-mean(1) =  842.76
##    rank-mean(2) = 1051.42

Dataset %>% 
  filter(!is.na(sex), !is.na(bai_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)

## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       79.8  13.8     4.38  2.05
## 2 F       65.7  22.5     8.73  3.13

Individual chi square for each sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

Textual results

lm(age ~ country, Dataset) %>% 
  apaTables::apa.aov.table()

## 
## 
## ANOVA results using age as the dependent variable
##  
## 
##    Predictor        SS   df        MS        F    p partial_eta2
##  (Intercept) 558849.60    1 558849.60 39488.47 .000             
##      country    996.03    2    498.01    35.19 .000          .04
##        Error  27257.18 1926     14.15                           
##  CI_90_partial_eta2
##                    
##          [.02, .05]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

emmeans::emmeans(lm(age ~ country, Dataset), pairwise ~country, adj = "tukey")

## $emmeans
##  country  emmean    SE   df lower.CL upper.CL
##  SPAIN      21.5 0.108 1926     21.2     21.7
##  PORTUGAL   20.4 0.182 1926     20.0     20.9
##  BRAZIL     22.8 0.220 1926     22.3     23.4
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $contrasts
##  contrast          estimate    SE   df t.ratio p.value
##  SPAIN - PORTUGAL      1.06 0.212 1926  4.986  <.0001 
##  SPAIN - BRAZIL       -1.33 0.245 1926 -5.450  <.0001 
##  PORTUGAL - BRAZIL    -2.39 0.286 1926 -8.376  <.0001 
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

Dataset %>% 
  filter(country == "SPAIN") %>% 
  {chisq.test(table(.$sex))}

## 
##  Chi-squared test for given probabilities
## 
## data:  table(.$sex)
## X-squared = 161, df = 1, p-value <0.0000000000000002

lm(bdi_sum ~ country, Dataset) %>% 
  residuals() %>% 
  qplot(.)

lm(bdi_sum ~ country, Dataset) %>% 
  residuals() %>% shapiro.test(.)

## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.88, p-value <0.0000000000000002

lm(bai_sum ~ country, Dataset) %>% 
  residuals() %>% shapiro.test(.)

## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.84, p-value <0.0000000000000002

Correlation

cor(Dataset$bdi_sum, Dataset$bai_sum, method = "spearman", use = "pairwise.complete.obs")

## [1] 0.5762

Analysis – Depression and anxiety symptoms in university students from Spain, Portugal and Brazil

Luis Anunciacao

May 13, 2020

Global

Packages

Data checking

Boxplot

Missing

Locating participants will all missing

Comparing missing data

Composite scores

Depression

Add total scores

Add classification

Anxiety

Add total scores

Add classification

Cronbach’s alpha

Depression

Anxiety

Traditional analysis

Depression

Anxiety

Table 1

Table 2

Depression

By country

Post hoc

By sex

Anxiety

By sex

Textual results