Global

#Global options
knitr::opts_chunk$set(echo = TRUE, 
                      message = FALSE, 
                      warning = FALSE, 
                      include = TRUE,
                      cache = FALSE)

knitr::opts_knit$set(progress = TRUE, verbose = TRUE)

# auto format (kable)
options(kableExtra.auto_format = FALSE)

#round

options("scipen"=100, "digits"=4)
#get file
load(file="C:/Users/luisf/Dropbox/Puc-Rio/Projeto - Junia/Base R - Pesquisa mapfre.RData")

Packages

library(tidyverse) #environment
library(psych) #psychometric analysis
library(summarytools) #descriptive

Data checking

Dataset <- janitor::clean_names(Dataset)

Dataset <- Dataset %>% 
  mutate(id = row_number())

Boxplot

plot_bdi <- Dataset %>% 
  select(starts_with("bdi")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()
plot_bai <- Dataset %>% 
  select(starts_with("bai")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()
gridExtra::grid.arrange(plot_bdi, plot_bai)

Missing

Dataset %>% 
  select(starts_with("bdi")) %>% 
  DataExplorer::plot_missing()

Dataset %>% 
  select(starts_with("bai")) %>% 
  DataExplorer::plot_missing()

Locating participants will all missing

Dataset %>% 
  select(id,country,starts_with("bdi")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bdi")) %>% ncol()) %>% #all missing
  count(country, id)
## # A tibble: 8 x 3
##   country     id     n
##   <fct>    <int> <int>
## 1 SPAIN        9     1
## 2 SPAIN       10     1
## 3 SPAIN      486     1
## 4 SPAIN      532     1
## 5 PORTUGAL  1217     1
## 6 PORTUGAL  1218     1
## 7 PORTUGAL  1441     1
## 8 BRAZIL    1795     1
Dataset %>% 
  select(id,country,starts_with("bai")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bai")) %>% ncol()) %>% #all missing
  count(country, id)
## # A tibble: 5 x 3
##   country     id     n
##   <fct>    <int> <int>
## 1 SPAIN        8     1
## 2 SPAIN        9     1
## 3 SPAIN       10     1
## 4 PORTUGAL  1217     1
## 5 PORTUGAL  1218     1

Comparing missing data

imputed_data_bdi <- Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  mice::mice(., m=5, maxit = 50, method = 'pmm', seed = 500)

summary(imputed_Data_bdi)

#get complete data ( 2nd out of 5)

complete_data_bdi <- mice::complete(imputed_data_bdi,2)

#check
DataExplorer::plot_missing(complete_data_bdi)

#add sums
complete_data_bdi <- complete_data_bdi %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T))

Comparing

comparable_results <- bind_rows(Dataset %>% 
                                 mutate(base = "original") %>% 
                                 select(base, bdi_sum),
                             
                               complete_data_bdi %>% 
                                 mutate(base = "imputada") %>% 
                                 select(base, bdi_sum))

# Check correlation
comparable_results %>% 
  unstack(bdi_sum~base) %>% 
  ggplot(., aes(x = original, y = imputada)) +
  geom_jitter() + 
  geom_smooth()

#check correlation
cor(Dataset$bdi_sum, complete_data_bdi$bdi_sum)

#check distributionn
ggplot(comparable_results, aes(x = base, y = bdi_sum)) +
  geom_boxplot()

wilcox.test(bdi_sum ~ base, data = comparable_results, paired = TRUE)

Composite scores

Depression

Add total scores

Dataset <- Dataset %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, then do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bdi = rowSums(is.na(select(.,starts_with("bdi"))))) %>% #create a variable to count missing
  mutate(bdi_sum = if_else(number_na_bdi == 21, NA_real_,bdi_sum)) #if all is missing

Add classification

Dataset <- Dataset %>% 
  mutate(bdi_class = case_when(
    bdi_sum <= 13 ~ "minima",
    bdi_sum < 20 ~ "leve",
    bdi_sum < 29 ~ "moderada",
    bdi_sum >= 29 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bdi_class = factor(bdi_class,
         levels=c("minima","leve","moderada","grave")))

Anxiety

Add total scores

Dataset <- Dataset %>% 
  mutate(bai_sum = rowSums(select(., starts_with("bai")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, then do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bai = rowSums(is.na(select(.,starts_with("bai"))))) %>% #create a variable to count missing
  mutate(bai_sum = if_else(number_na_bai == 21, NA_real_,bai_sum)) #if all is missing

Add classification

Dataset <- Dataset %>% 
  mutate(bai_class = case_when(
    bai_sum <= 10 ~ "minima",
    bai_sum < 20 ~ "leve",
    bai_sum < 31 ~ "moderada",
    bai_sum >= 31 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bai_class = factor(bai_class,
         levels=c("minima","leve","moderada","grave")))

Cronbach’s alpha

Depression

Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  alpha()
## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.89       0.9     0.9      0.29 8.7 0.0034 0.44 0.37     0.29
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.89 0.9 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bdi_1       0.89      0.89    0.89      0.29 8.1   0.0036 0.0049  0.29
## bdi_2       0.89      0.89    0.90      0.30 8.4   0.0035 0.0050  0.30
## bdi_3       0.89      0.89    0.90      0.29 8.2   0.0035 0.0049  0.29
## bdi_4       0.89      0.89    0.90      0.29 8.2   0.0036 0.0050  0.29
## bdi_5       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_6       0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_7       0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_8       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_9       0.89      0.90    0.90      0.30 8.6   0.0034 0.0045  0.30
## bdi_10      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_11      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.30
## bdi_12      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_13      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_14      0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_15      0.89      0.89    0.89      0.29 8.1   0.0036 0.0046  0.29
## bdi_16      0.89      0.89    0.90      0.30 8.5   0.0035 0.0048  0.30
## bdi_17      0.89      0.89    0.90      0.29 8.2   0.0036 0.0053  0.29
## bdi_18      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.29
## bdi_19      0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_20      0.89      0.89    0.89      0.29 8.2   0.0036 0.0048  0.29
## bdi_21      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop  mean   sd
## bdi_1  1946  0.64  0.66  0.65   0.60 0.248 0.53
## bdi_2  1947  0.52  0.52  0.48   0.45 0.543 0.69
## bdi_3  1949  0.58  0.60  0.57   0.53 0.298 0.57
## bdi_4  1947  0.62  0.62  0.60   0.56 0.375 0.59
## bdi_5  1946  0.56  0.56  0.53   0.50 0.488 0.61
## bdi_6  1946  0.48  0.50  0.46   0.43 0.162 0.51
## bdi_7  1945  0.66  0.65  0.64   0.60 0.386 0.70
## bdi_8  1949  0.59  0.58  0.55   0.52 0.790 0.72
## bdi_9  1947  0.40  0.44  0.40   0.36 0.072 0.32
## bdi_10 1947  0.52  0.51  0.47   0.45 0.342 0.69
## bdi_11 1941  0.54  0.53  0.49   0.47 0.544 0.69
## bdi_12 1943  0.60  0.61  0.58   0.54 0.433 0.62
## bdi_13 1943  0.61  0.60  0.57   0.54 0.464 0.79
## bdi_14 1941  0.64  0.64  0.62   0.59 0.307 0.67
## bdi_15 1939  0.66  0.65  0.64   0.60 0.565 0.69
## bdi_16 1933  0.53  0.51  0.48   0.45 0.855 0.78
## bdi_17 1938  0.60  0.60  0.57   0.54 0.434 0.64
## bdi_18 1942  0.55  0.54  0.50   0.48 0.535 0.71
## bdi_19 1942  0.59  0.57  0.54   0.52 0.756 0.80
## bdi_20 1942  0.63  0.62  0.60   0.57 0.527 0.69
## bdi_21 1939  0.48  0.51  0.47   0.44 0.136 0.45
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bdi_1  0.79 0.17 0.03 0.01 0.01
## bdi_2  0.55 0.38 0.05 0.02 0.01
## bdi_3  0.75 0.20 0.04 0.01 0.00
## bdi_4  0.68 0.28 0.04 0.01 0.01
## bdi_5  0.56 0.40 0.03 0.01 0.01
## bdi_6  0.88 0.10 0.01 0.02 0.01
## bdi_7  0.72 0.20 0.06 0.02 0.01
## bdi_8  0.37 0.50 0.11 0.02 0.00
## bdi_9  0.94 0.05 0.00 0.01 0.01
## bdi_10 0.77 0.14 0.07 0.02 0.01
## bdi_11 0.55 0.39 0.04 0.02 0.01
## bdi_12 0.63 0.33 0.03 0.01 0.01
## bdi_13 0.67 0.24 0.04 0.05 0.01
## bdi_14 0.80 0.10 0.09 0.01 0.01
## bdi_15 0.54 0.37 0.08 0.01 0.01
## bdi_16 0.35 0.49 0.12 0.04 0.01
## bdi_17 0.64 0.31 0.04 0.01 0.01
## bdi_18 0.57 0.34 0.06 0.02 0.01
## bdi_19 0.46 0.34 0.18 0.02 0.01
## bdi_20 0.57 0.35 0.06 0.02 0.01
## bdi_21 0.90 0.08 0.02 0.01 0.01

Anxiety

Dataset %>% 
  select(starts_with("bai"), -bai_sum, -bai_class) %>% 
  alpha()
## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##        0.9       0.9    0.91      0.31 9.4 0.0032 0.41 0.39     0.31
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.9 0.91 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bai_1       0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## bai_2       0.90       0.9    0.91      0.32 9.2   0.0032 0.0077  0.31
## bai_3       0.90       0.9    0.91      0.31 9.0   0.0033 0.0082  0.31
## bai_4       0.89       0.9    0.91      0.31 8.8   0.0034 0.0074  0.30
## bai_5       0.89       0.9    0.90      0.30 8.7   0.0034 0.0071  0.30
## bai_6       0.90       0.9    0.91      0.31 9.0   0.0033 0.0080  0.30
## bai_7       0.89       0.9    0.91      0.31 8.8   0.0034 0.0075  0.30
## bai_8       0.89       0.9    0.91      0.31 8.8   0.0033 0.0078  0.30
## bai_9       0.90       0.9    0.91      0.31 8.9   0.0033 0.0075  0.31
## bai_10      0.89       0.9    0.90      0.30 8.7   0.0034 0.0074  0.30
## bai_11      0.89       0.9    0.90      0.30 8.8   0.0034 0.0074  0.30
## bai_12      0.89       0.9    0.90      0.31 8.8   0.0033 0.0077  0.30
## bai_13      0.90       0.9    0.90      0.30 8.8   0.0033 0.0074  0.30
## bai_14      0.90       0.9    0.91      0.31 8.9   0.0033 0.0079  0.31
## bai_15      0.90       0.9    0.91      0.31 9.0   0.0033 0.0076  0.30
## bai_16      0.90       0.9    0.91      0.32 9.4   0.0032 0.0068  0.31
## bai_17      0.89       0.9    0.90      0.31 8.8   0.0034 0.0076  0.30
## bai_18      0.90       0.9    0.91      0.31 9.1   0.0033 0.0082  0.31
## bai_19      0.90       0.9    0.91      0.31 9.0   0.0033 0.0081  0.31
## bai_20      0.90       0.9    0.91      0.32 9.4   0.0031 0.0070  0.31
## bai_21      0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## bai_1  1944  0.53  0.53  0.49   0.47 0.41 0.63
## bai_2  1946  0.49  0.48  0.44   0.42 0.69 0.75
## bai_3  1940  0.55  0.56  0.53   0.50 0.34 0.63
## bai_4  1941  0.66  0.63  0.62   0.59 0.89 0.89
## bai_5  1947  0.70  0.68  0.67   0.64 0.69 0.90
## bai_6  1949  0.57  0.58  0.55   0.52 0.28 0.61
## bai_7  1948  0.65  0.65  0.63   0.60 0.34 0.68
## bai_8  1948  0.63  0.63  0.61   0.58 0.36 0.65
## bai_9  1942  0.60  0.61  0.59   0.55 0.20 0.53
## bai_10 1946  0.70  0.67  0.66   0.63 1.04 0.88
## bai_11 1944  0.65  0.66  0.65   0.61 0.22 0.59
## bai_12 1946  0.63  0.64  0.63   0.58 0.30 0.63
## bai_13 1942  0.63  0.65  0.64   0.59 0.17 0.47
## bai_14 1947  0.58  0.59  0.56   0.53 0.24 0.59
## bai_15 1945  0.57  0.59  0.57   0.52 0.22 0.57
## bai_16 1947  0.41  0.41  0.36   0.34 0.18 0.56
## bai_17 1940  0.65  0.65  0.63   0.60 0.39 0.69
## bai_18 1944  0.56  0.55  0.51   0.49 0.55 0.77
## bai_19 1939  0.53  0.56  0.53   0.49 0.14 0.44
## bai_20 1940  0.43  0.43  0.38   0.36 0.50 0.71
## bai_21 1944  0.53  0.53  0.50   0.47 0.37 0.66
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bai_1  0.67 0.26 0.07 0.00 0.01
## bai_2  0.47 0.38 0.14 0.01 0.01
## bai_3  0.74 0.19 0.07 0.01 0.01
## bai_4  0.41 0.35 0.19 0.05 0.01
## bai_5  0.56 0.24 0.15 0.05 0.01
## bai_6  0.80 0.13 0.06 0.01 0.00
## bai_7  0.76 0.16 0.07 0.01 0.00
## bai_8  0.73 0.20 0.06 0.01 0.00
## bai_9  0.85 0.10 0.04 0.01 0.01
## bai_10 0.30 0.42 0.21 0.07 0.01
## bai_11 0.85 0.09 0.05 0.01 0.01
## bai_12 0.78 0.15 0.06 0.01 0.01
## bai_13 0.87 0.10 0.03 0.00 0.01
## bai_14 0.83 0.12 0.04 0.01 0.01
## bai_15 0.85 0.10 0.04 0.01 0.01
## bai_16 0.89 0.06 0.03 0.02 0.01
## bai_17 0.71 0.20 0.07 0.01 0.01
## bai_18 0.60 0.27 0.10 0.02 0.01
## bai_19 0.89 0.08 0.03 0.00 0.01
## bai_20 0.61 0.29 0.09 0.01 0.01
## bai_21 0.72 0.21 0.06 0.01 0.01

Traditional analysis

Depression

mod_completo_bdi <- lm(bdi_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bdi)
## 
## 
## ANOVA results using bdi_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  35232.64    1 35232.64 595.18 .000             
##          sex    528.14    1   528.14   8.92 .003          .00
##      country   1179.55    2   589.77   9.96 .000          .01
##        Error 114723.93 1938    59.20                         
##  CI_90_partial_eta2
##                    
##          [.00, .01]
##          [.00, .02]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared
#normality
olsrr::ols_test_normality(mod_completo_bdi)
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8825         0.0000 
## Kolmogorov-Smirnov        0.1111         0.0000 
## Cramer-von Mises         171.717         0.0000 
## Anderson-Darling         47.4803         0.0000 
## -----------------------------------------------
#equal variance
olsrr::ols_test_breusch_pagan(mod_completo_bdi)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bdi_sum 
##  Variables: fitted values of bdi_sum 
## 
##       Test Summary        
##  -------------------------
##  DF            =    1 
##  Chi2          =    2.5017 
##  Prob > Chi2   =    0.1137
#all plots
lindia::gg_diagnose(mod_completo_bdi)

Anxiety

mod_completo_bai <- lm(bai_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bai)
## 
## 
## ANOVA results using bai_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  23701.50    1 23701.50 372.46 .000             
##          sex   3248.35    1  3248.35  51.05 .000          .03
##      country    252.51    2   126.25   1.98 .138          .00
##        Error 123515.25 1941    63.63                         
##  CI_90_partial_eta2
##                    
##          [.02, .04]
##          [.00, .01]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared
#normality
olsrr::ols_test_normality(mod_completo_bai)
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8549         0.0000 
## Kolmogorov-Smirnov        0.1427         0.0000 
## Cramer-von Mises         186.2753        0.0000 
## Anderson-Darling         72.9096         0.0000 
## -----------------------------------------------
#equal variance
olsrr::ols_test_breusch_pagan(mod_completo_bai)
## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bai_sum 
##  Variables: fitted values of bai_sum 
## 
##          Test Summary           
##  -------------------------------
##  DF            =    1 
##  Chi2          =    27.6053 
##  Prob > Chi2   =    0.0000001488
#all plots
lindia::gg_diagnose(mod_completo_bai)

Table 1

Country

Dataset %>% 
  freq(country)
## Frequencies  
## Dataset$country  
## Type: Factor  
## 
##                  Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## -------------- ------ --------- -------------- --------- --------------
##          SPAIN   1216     62.14          62.14     62.14          62.14
##       PORTUGAL    426     21.77          83.90     21.77          83.90
##         BRAZIL    315     16.10         100.00     16.10         100.00
##           <NA>      0                               0.00         100.00
##          Total   1957    100.00         100.00    100.00         100.00
Dataset %>% 
  group_by(country) %>% 
  freq(sex)
## Frequencies  
## Dataset$sex  
## Type: Factor  
## Group: country = SPAIN  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    384     31.76          31.76     31.58          31.58
##           F    825     68.24         100.00     67.85          99.42
##        <NA>      7                               0.58         100.00
##       Total   1216    100.00         100.00    100.00         100.00
## 
## Group: country = PORTUGAL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    203     47.65          47.65     47.65          47.65
##           F    223     52.35         100.00     52.35         100.00
##        <NA>      0                               0.00         100.00
##       Total    426    100.00         100.00    100.00         100.00
## 
## Group: country = BRAZIL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    149     47.30          47.30     47.30          47.30
##           F    166     52.70         100.00     52.70         100.00
##        <NA>      0                               0.00         100.00
##       Total    315    100.00         100.00    100.00         100.00
Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(sex))$p.value) 
## # A tibble: 3 x 2
##   country    pvalue
##   <fct>       <dbl>
## 1 SPAIN    7.34e-37
## 2 PORTUGAL 3.33e- 1
## 3 BRAZIL   3.38e- 1

Age

Dataset %>% 
  group_by(country) %>% 
  descr(age)
## Descriptive Statistics  
## age by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean     21.49      20.43    22.83
##           Std.Dev      3.02       1.66     7.18
##               Min     18.00      18.00    17.00
##                Q1     20.00      19.00    20.00
##            Median     21.00      20.00    21.00
##                Q3     23.00      22.00    23.00
##               Max     48.00      23.00    68.00
##               MAD      2.97       1.48     2.97
##               IQR      3.00       3.00     3.00
##                CV      0.14       0.08     0.31
##          Skewness      2.59       0.02     3.65
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis     13.68      -1.27    15.34
##           N.Valid   1210.00     426.00   293.00
##         Pct.Valid     99.51     100.00    93.02
Dataset %>% 
  group_by(sex) %>% 
  descr(age)
## Descriptive Statistics  
## age by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F      NA
## ----------------- -------- --------- -------
##              Mean    21.30     21.56   21.33
##           Std.Dev     3.31      4.11    4.16
##               Min    17.00     17.00   18.00
##                Q1    19.00     19.00   18.00
##            Median    21.00     21.00   20.00
##                Q3    22.00     22.00   26.00
##               Max    48.00     68.00   26.00
##               MAD     1.48      2.97    2.97
##               IQR     3.00      3.00    4.00
##                CV     0.16      0.19    0.20
##          Skewness     3.01      5.53    0.29
##       SE.Skewness     0.09      0.07    1.22
##          Kurtosis    14.69     45.70   -2.33
##           N.Valid   720.00   1206.00    3.00
##         Pct.Valid    97.83     99.34   42.86
Dataset %>% 
  group_by(sex, country) %>% 
  descr(age)
## Descriptive Statistics  
## age by sex  
## Data Frame: Dataset  
## N: 384  
## 
##                     sex = M, country = SPAIN   sex = M, country = PORTUGAL
## ----------------- -------------------------- -----------------------------
##              Mean                      21.43                         20.51
##           Std.Dev                       3.48                          1.64
##               Min                      18.00                         18.00
##                Q1                      19.00                         19.00
##            Median                      21.00                         21.00
##                Q3                      22.00                         22.00
##               Max                      48.00                         23.00
##               MAD                       2.97                          1.48
##               IQR                       3.00                          3.00
##                CV                       0.16                          0.08
##          Skewness                       2.85                         -0.06
##       SE.Skewness                       0.12                          0.17
##          Kurtosis                      14.11                         -1.23
##           N.Valid                     384.00                        203.00
##         Pct.Valid                     100.00                        100.00
## 
## Table: Table continues below
## 
##  
## 
##                     sex = M, country = BRAZIL   sex = F, country = SPAIN
## ----------------- --------------------------- --------------------------
##              Mean                       22.13                      21.52
##           Std.Dev                        4.31                       2.77
##               Min                       17.00                      18.00
##                Q1                       20.00                      20.00
##            Median                       21.00                      21.00
##                Q3                       23.00                      23.00
##               Max                       41.00                      43.00
##               MAD                        1.48                       2.97
##               IQR                        3.00                       3.00
##                CV                        0.19                       0.13
##          Skewness                        2.40                       2.29
##       SE.Skewness                        0.21                       0.09
##          Kurtosis                        6.45                      11.63
##           N.Valid                      133.00                     823.00
##         Pct.Valid                       89.26                      99.76
## 
## Table: Table continues below
## 
##  
## 
##                     sex = F, country = PORTUGAL   sex = F, country = BRAZIL
## ----------------- ----------------------------- ---------------------------
##              Mean                         20.37                       23.41
##           Std.Dev                          1.68                        8.87
##               Min                         18.00                       17.00
##                Q1                         19.00                       19.00
##            Median                         20.00                       21.00
##                Q3                         22.00                       23.00
##               Max                         23.00                       68.00
##               MAD                          1.48                        2.97
##               IQR                          3.00                        4.00
##                CV                          0.08                        0.38
##          Skewness                          0.09                        3.18
##       SE.Skewness                          0.16                        0.19
##          Kurtosis                         -1.30                       10.26
##           N.Valid                        223.00                      160.00
##         Pct.Valid                        100.00                       96.39
## 
## Table: Table continues below
## 
##  
## 
##                     sex = NA, country = SPAIN
## ----------------- ---------------------------
##              Mean                       21.33
##           Std.Dev                        4.16
##               Min                       18.00
##                Q1                       18.00
##            Median                       20.00
##                Q3                       26.00
##               Max                       26.00
##               MAD                        2.97
##               IQR                        4.00
##                CV                        0.20
##          Skewness                        0.29
##       SE.Skewness                        1.22
##          Kurtosis                       -2.33
##           N.Valid                        3.00
##         Pct.Valid                       42.86
##  Descriptive Statistics  
## age by country  
## Data Frame: Dataset  
## N: 384  
## 
##                     sex = M, country = SPAIN   sex = M, country = PORTUGAL
## ----------------- -------------------------- -----------------------------
##              Mean                      21.43                         20.51
##           Std.Dev                       3.48                          1.64
##               Min                      18.00                         18.00
##                Q1                      19.00                         19.00
##            Median                      21.00                         21.00
##                Q3                      22.00                         22.00
##               Max                      48.00                         23.00
##               MAD                       2.97                          1.48
##               IQR                       3.00                          3.00
##                CV                       0.16                          0.08
##          Skewness                       2.85                         -0.06
##       SE.Skewness                       0.12                          0.17
##          Kurtosis                      14.11                         -1.23
##           N.Valid                     384.00                        203.00
##         Pct.Valid                     100.00                        100.00
## 
## Table: Table continues below
## 
##  
## 
##                     sex = M, country = BRAZIL   sex = F, country = SPAIN
## ----------------- --------------------------- --------------------------
##              Mean                       22.13                      21.52
##           Std.Dev                        4.31                       2.77
##               Min                       17.00                      18.00
##                Q1                       20.00                      20.00
##            Median                       21.00                      21.00
##                Q3                       23.00                      23.00
##               Max                       41.00                      43.00
##               MAD                        1.48                       2.97
##               IQR                        3.00                       3.00
##                CV                        0.19                       0.13
##          Skewness                        2.40                       2.29
##       SE.Skewness                        0.21                       0.09
##          Kurtosis                        6.45                      11.63
##           N.Valid                      133.00                     823.00
##         Pct.Valid                       89.26                      99.76
## 
## Table: Table continues below
## 
##  
## 
##                     sex = F, country = PORTUGAL   sex = F, country = BRAZIL
## ----------------- ----------------------------- ---------------------------
##              Mean                         20.37                       23.41
##           Std.Dev                          1.68                        8.87
##               Min                         18.00                       17.00
##                Q1                         19.00                       19.00
##            Median                         20.00                       21.00
##                Q3                         22.00                       23.00
##               Max                         23.00                       68.00
##               MAD                          1.48                        2.97
##               IQR                          3.00                        4.00
##                CV                          0.08                        0.38
##          Skewness                          0.09                        3.18
##       SE.Skewness                          0.16                        0.19
##          Kurtosis                         -1.30                       10.26
##           N.Valid                        223.00                      160.00
##         Pct.Valid                        100.00                       96.39
## 
## Table: Table continues below
## 
##  
## 
##                     sex = NA, country = SPAIN
## ----------------- ---------------------------
##              Mean                       21.33
##           Std.Dev                        4.16
##               Min                       18.00
##                Q1                       18.00
##            Median                       20.00
##                Q3                       26.00
##               Max                       26.00
##               MAD                        2.97
##               IQR                        4.00
##                CV                        0.20
##          Skewness                        0.29
##       SE.Skewness                        1.22
##          Kurtosis                       -2.33
##           N.Valid                        3.00
##         Pct.Valid                       42.86
Dataset %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(t_test = map(data, ~t.test(.$age ~ .$sex)$p.value)) %>%  
  unnest(t_test)
## # A tibble: 3 x 3
## # Groups:   country [3]
##   country             data t_test
##   <fct>    <list<df[,93]>>  <dbl>
## 1 SPAIN       [1,216 × 93]  0.669
## 2 PORTUGAL      [426 × 93]  0.387
## 3 BRAZIL        [315 × 93]  0.109

This table was checked twice. Last checked on May 14 2020.

Table 2

Depression

By country

Dataset %>%
  group_by(country) %>% 
  descr(bdi_sum)
## Descriptive Statistics  
## bdi_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.86       9.05    10.89
##           Std.Dev      7.54       7.73     8.29
##               Min      0.00       0.00     0.00
##                Q1      3.00       4.00     5.00
##            Median      7.00       7.00     9.50
##                Q3     12.00      12.00    15.00
##               Max     57.00      53.00    41.00
##               MAD      5.93       5.93     8.15
##               IQR      9.00       8.00    10.00
##                CV      0.85       0.85     0.76
##          Skewness      1.77       1.76     1.01
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      5.12       5.11     0.80
##           N.Valid   1212.00     423.00   314.00
##         Pct.Valid     99.67      99.30    99.68
kruskal.test(bdi_sum ~ country, data = Dataset)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  bdi_sum by country
## Kruskal-Wallis chi-squared = 18, df = 2, p-value = 0.0001
Dataset %>% 
  rstatix::kruskal_effsize(bdi_sum ~ country) #effect size
## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bdi_sum  1957 0.00802 eta2[H] small

Post hoc

PMCMR::posthoc.kruskal.nemenyi.test(x=Dataset$bdi_sum, g=Dataset$country, method="Tukey")
## 
##  Pairwise comparisons using Tukey and Kramer (Nemenyi) test  
##                    with Tukey-Dist approximation for independent samples 
## 
## data:  Dataset$bdi_sum and Dataset$country 
## 
##          SPAIN    PORTUGAL
## PORTUGAL 0.9351   -       
## BRAZIL   0.000098 0.0031  
## 
## P value adjustment method: none
Dataset %>% 
  rstatix::dunn_test(bdi_sum ~ country, p.adjust.method = "bonferroni") 
## # A tibble: 3 x 11
##   .y.   group1 group2    n1    n2 estimate statistic       p method   p.adj
## * <chr> <chr>  <chr>  <int> <int>    <dbl>     <dbl>   <dbl> <chr>    <dbl>
## 1 bdi_… SPAIN  PORTU…  1212   423     11.1     0.349 7.27e-1 Dunn … 1.00e+0
## 2 bdi_… SPAIN  BRAZIL  1212   314    148.      4.16  3.22e-5 Dunn … 9.67e-5
## 3 bdi_… PORTU… BRAZIL   423   314    137.      3.27  1.08e-3 Dunn … 3.24e-3
## # … with 1 more variable: p.adj.signif <chr>
dunn.test::dunn.test(Dataset$bdi_sum,Dataset$country,method = "Bonferroni")
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 17.675, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |     BRAZIL   PORTUGAL
## ---------+----------------------
## PORTUGAL |   3.269105
##          |    0.0016*
##          |
##    SPAIN |   4.157182   0.349355
##          |    0.0000*     1.0000
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
Dataset %>% 
  group_by(country) %>% 
  filter(!is.na(bdi_class)) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)
## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      80.4  11.4     5.69  2.56
## 2 PORTUGAL   79.0  11.8     6.38  2.84
## 3 BRAZIL     68.8  16.2    10.8   4.14

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bdi_class))$p.value) 
## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    0.       
## 2 PORTUGAL 1.48e-143
## 3 BRAZIL   2.67e- 71

By sex

Dataset %>%
  group_by(sex) %>% 
  descr(bdi_sum)
## Descriptive Statistics  
## bdi_sum by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F       NA
## ----------------- -------- --------- --------
##              Mean     8.68      9.59     5.14
##           Std.Dev     7.74      7.73     5.46
##               Min     0.00      0.00     1.00
##                Q1     3.00      4.00     2.00
##            Median     7.00      8.00     3.00
##                Q3    12.00     13.00     9.00
##               Max    57.00     53.00    16.00
##               MAD     5.93      7.41     1.48
##               IQR     9.00      9.00     4.00
##                CV     0.89      0.81     1.06
##          Skewness     1.89      1.47     1.04
##       SE.Skewness     0.09      0.07     0.79
##          Kurtosis     5.63      3.29    -0.65
##           N.Valid   732.00   1210.00     7.00
##         Pct.Valid    99.46     99.67   100.00
wilcox.test(bdi_sum ~ sex, data = Dataset)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bdi_sum by sex
## W = 404216, p-value = 0.001
## alternative hypothesis: true location shift is not equal to 0
Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bdi_sum, sex,distribution = "asymptotic")}
## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 732) | 2 = F (n = 1210):
##   U = 672494.000, W = 404216.000, p = 0.001, Z = -3.231
##   effect-size r =    0.073
##    rank-mean(1) =  918.71
##    rank-mean(2) = 1003.44
Dataset %>% 
  filter(!is.na(sex), !is.na(bdi_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)
## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       81.0  10.7     5.05  3.28
## 2 F       76.4  13.2     7.69  2.64

Individual chi square for each sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value) 
## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

Anxiety

Dataset %>%
  group_by(country) %>% 
  descr(bai_sum)
## Descriptive Statistics  
## bai_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.55       7.92     9.01
##           Std.Dev      8.06       8.04     8.40
##               Min      0.00       0.00     0.00
##                Q1      3.00       2.00     3.00
##            Median      6.00       5.00     6.00
##                Q3     12.00      11.00    12.00
##               Max     48.00      45.00    46.00
##               MAD      5.93       5.93     5.93
##               IQR      9.00       9.00     9.00
##                CV      0.94       1.02     0.93
##          Skewness      1.68       1.82     1.64
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      3.38       3.87     3.31
##           N.Valid   1213.00     424.00   315.00
##         Pct.Valid     99.75      99.53   100.00
kruskal.test(bai_sum ~ country, data = Dataset)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  bai_sum by country
## Kruskal-Wallis chi-squared = 5.8, df = 2, p-value = 0.06
Dataset %>% 
  rstatix::kruskal_effsize(bai_sum ~ country) #effect size
## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bai_sum  1957 0.00192 eta2[H] small
Dataset %>% 
  group_by(country) %>%
  filter(!is.na(bai_class)) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)
## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      71.2  19.0     7.17  2.64
## 2 PORTUGAL   72.6  18.4     5.90  3.07
## 3 BRAZIL     67.3  21.6     8.25  2.86

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value) 
## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    2.04e-314
## 2 PORTUGAL 8.19e-116
## 3 BRAZIL   6.20e- 70

By sex

Dataset %>%
  group_by(sex) %>% 
  descr(bai_sum)
## Descriptive Statistics  
## bai_sum by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F       NA
## ----------------- -------- --------- --------
##              Mean     6.81      9.47    12.14
##           Std.Dev     7.14      8.45    15.07
##               Min     0.00      0.00     0.00
##                Q1     2.00      4.00     2.00
##            Median     5.00      7.00     9.00
##                Q3     9.00     13.00    14.00
##               Max    44.00     48.00    44.00
##               MAD     4.45      5.93     8.90
##               IQR     7.00      9.00    11.00
##                CV     1.05      0.89     1.24
##          Skewness     2.08      1.52     1.20
##       SE.Skewness     0.09      0.07     0.79
##          Kurtosis     5.51      2.70    -0.04
##           N.Valid   731.00   1214.00     7.00
##         Pct.Valid    99.32    100.00   100.00
wilcox.test(bai_sum ~ sex, data = Dataset)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bai_sum by sex
## W = 348512, p-value = 0.000000000000002
## alternative hypothesis: true location shift is not equal to 0
Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bai_sum, sex,distribution = "asymptotic")}
## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 731) | 2 = F (n = 1214):
##   U = 616057.500, W = 348511.500, p < 0.001, Z = -7.951
##   effect-size r =    0.180
##    rank-mean(1) =  842.76
##    rank-mean(2) = 1051.42
Dataset %>% 
  filter(!is.na(sex), !is.na(bai_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)
## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       79.8  13.8     4.38  2.05
## 2 F       65.7  22.5     8.73  3.13

Individual chi square for each sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value) 
## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

Textual results

lm(age ~ country, Dataset) %>% 
  apaTables::apa.aov.table()
## 
## 
## ANOVA results using age as the dependent variable
##  
## 
##    Predictor        SS   df        MS        F    p partial_eta2
##  (Intercept) 558849.60    1 558849.60 39488.47 .000             
##      country    996.03    2    498.01    35.19 .000          .04
##        Error  27257.18 1926     14.15                           
##  CI_90_partial_eta2
##                    
##          [.02, .05]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared
emmeans::emmeans(lm(age ~ country, Dataset), pairwise ~country, adj = "tukey")
## $emmeans
##  country  emmean    SE   df lower.CL upper.CL
##  SPAIN      21.5 0.108 1926     21.2     21.7
##  PORTUGAL   20.4 0.182 1926     20.0     20.9
##  BRAZIL     22.8 0.220 1926     22.3     23.4
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 3 estimates 
## 
## $contrasts
##  contrast          estimate    SE   df t.ratio p.value
##  SPAIN - PORTUGAL      1.06 0.212 1926  4.986  <.0001 
##  SPAIN - BRAZIL       -1.33 0.245 1926 -5.450  <.0001 
##  PORTUGAL - BRAZIL    -2.39 0.286 1926 -8.376  <.0001 
## 
## P value adjustment: tukey method for comparing a family of 3 estimates
Dataset %>% 
  filter(country == "SPAIN") %>% 
  {chisq.test(table(.$sex))}
## 
##  Chi-squared test for given probabilities
## 
## data:  table(.$sex)
## X-squared = 161, df = 1, p-value <0.0000000000000002
lm(bdi_sum ~ country, Dataset) %>% 
  residuals() %>% 
  qplot(.)

lm(bdi_sum ~ country, Dataset) %>% 
  residuals() %>% shapiro.test(.)
## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.88, p-value <0.0000000000000002
lm(bai_sum ~ country, Dataset) %>% 
  residuals() %>% shapiro.test(.)
## 
##  Shapiro-Wilk normality test
## 
## data:  .
## W = 0.84, p-value <0.0000000000000002

Correlation

cor(Dataset$bdi_sum, Dataset$bai_sum, method = "spearman", use = "pairwise.complete.obs")
## [1] 0.5762