This markdown reports the analyses performed in the manuscript entitled “Depression and Anxiety Symptoms in a Representative Sample of Undergraduate Students in Spain, Portugal, and Brazil” (DOI: https://dx.doi.org/10.1590/0102.3772e36412). All files are available at https://osf.io/xpvbg/. To reproduce the results, load the Rdata file and run all chunks if ‘eval = FALSE’ is not present. I recommend going straight to line 280 to reproduce the published results.

In case you have any questions or suggestions, please feel free to contact me at luisfca@puc-rio.br

1 Global

#Global options
knitr::opts_chunk$set(echo = TRUE, 
                      message = FALSE, 
                      warning = FALSE, 
                      include = TRUE,
                      cache = FALSE)

knitr::opts_knit$set(progress = TRUE, verbose = TRUE)

# auto format (kable)
options(kableExtra.auto_format = FALSE)

#get file
load(file="C:/Users/luisf/Dropbox/Puc-Rio/Projeto - Junia/Base R - Pesquisa mapfre.RData")

2 Packages

library(tidyverse) #environment
library(psych) #psychometric analysis
library(summarytools) #descriptive

3 Data checking

This set of chunks is not necessary. They just present what I performed with the raw data.

Dataset <- janitor::clean_names(Dataset)

Dataset <- Dataset %>% 
  mutate(id = row_number())

3.1 Boxplot

BDI

plot_bdi <- Dataset %>% 
  select(starts_with("bdi")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()

BAI

plot_bai <- Dataset %>% 
  select(starts_with("bai")) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(x=name, y = value)) +
  geom_boxplot()

Display plots

gridExtra::grid.arrange(plot_bdi, plot_bai)

3.2 Missing

BDI

Dataset %>% 
  select(starts_with("bdi")) %>% 
  DataExplorer::plot_missing()

BAI

Dataset %>% 
  select(starts_with("bai")) %>% 
  DataExplorer::plot_missing()

3.3 Locating participants will all missing

BDI

Dataset %>% 
  select(id,country,starts_with("bdi")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bdi")) %>% ncol()) %>% #all missing
  count(country, id)

##    country   id n
## 1    SPAIN    9 1
## 2    SPAIN   10 1
## 3    SPAIN  486 1
## 4    SPAIN  532 1
## 5 PORTUGAL 1217 1
## 6 PORTUGAL 1218 1
## 7 PORTUGAL 1441 1
## 8   BRAZIL 1795 1

BAI

Dataset %>% 
  select(id,country,starts_with("bai")) %>%  #select all items
  mutate(number_na =  rowSums(is.na(.))) %>% #
  filter(number_na == Dataset %>% select(starts_with("bai")) %>% ncol()) %>% #all missing
  count(country, id)

##    country   id n
## 1    SPAIN    8 1
## 2    SPAIN    9 1
## 3    SPAIN   10 1
## 4 PORTUGAL 1217 1
## 5 PORTUGAL 1218 1

3.4 Multiple imputation

Create a vector with the complete data.

imputed_data_bdi <- Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  mice::mice(., m=5, maxit = 50, method = 'pmm', seed = 500)

Just checking the results

summary(imputed_data_bdi)

##                 Length Class           Mode     
## data               21  data.frame      list     
## imp                21  -none-          list     
## m                   1  -none-          numeric  
## where           41097  -none-          logical  
## blocks             21  -none-          list     
## call                6  -none-          call     
## nmis               21  -none-          numeric  
## method             21  -none-          character
## predictorMatrix   441  -none-          numeric  
## visitSequence      21  -none-          character
## formulas           21  -none-          list     
## post               21  -none-          character
## blots              21  -none-          list     
## seed                1  -none-          numeric  
## iteration           1  -none-          numeric  
## lastSeedValue     626  -none-          numeric  
## chainMean        5250  -none-          numeric  
## chainVar         5250  -none-          numeric  
## loggedEvents        0  -none-          NULL     
## version             1  package_version list     
## date                1  Date            numeric

Get complete data ( 2nd out of 5)

complete_data_bdi <- mice::complete(imputed_data_bdi,2)

Double check with this new data is complete

DataExplorer::plot_missing(complete_data_bdi)

Add sums to this new data

complete_data_bdi <- complete_data_bdi %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T))

Now I’ll Compare the results obtained with the raw data (with missing cases) and the results obtained with the complete data.

comparable_results <- bind_rows(Dataset %>% 
                                 mutate(base = "original") %>% 
                                 select(base, bdi_sum),
                             
                               complete_data_bdi %>% 
                                 mutate(base = "imputada") %>% 
                                 select(base, bdi_sum))

The linear correlation will be the used as an index of the association of these two datasets.

comparable_results %>% 
  unstack(bdi_sum~base) %>% 
  ggplot(., aes(x = original, y = imputada)) +
  geom_jitter() + 
  geom_smooth()

The Correlaation coefficient is below.

cor(Dataset$bdi_sum, complete_data_bdi$bdi_sum, use = "complete.obs")

The distributionn of descriptive results is also needed.

ggplot(comparable_results, aes(x = base, y = bdi_sum)) +
  geom_boxplot()

Finally, the wilcox test will check if statistical differences are present. This test could be performed with pairwise options. However, as some cases are missing, I’ll just run the “independent” version of the wt.

wilcox.test(bdi_sum ~ base, data = comparable_results)

4 Composite scores

4.1 Depression

4.1.1 Add total scores

Dataset <- Dataset %>% 
  mutate(bdi_sum = rowSums(select(., starts_with("bdi")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bdi = rowSums(is.na(select(.,starts_with("bdi"))))) %>% #create a variable to count missing
  mutate(bdi_sum = if_else(number_na_bdi == 21, NA_real_,bdi_sum)) #if all is missing

4.1.2 Add classification

Dataset <- Dataset %>% 
  mutate(bdi_class = case_when(
    bdi_sum <= 13 ~ "minima",
    bdi_sum < 20 ~ "leve",
    bdi_sum < 29 ~ "moderada",
    bdi_sum >= 29 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bdi_class = factor(bdi_class,
         levels=c("minima","leve","moderada","grave")))

4.2 Anxiety

4.2.1 Add total scores

Dataset <- Dataset %>% 
  mutate(bai_sum = rowSums(select(., starts_with("bai")), na.rm=T)) #adjust for all missing)

If the participant left blank all items, do not use

#Dataset  
Dataset <- Dataset %>% #get the dataset
  mutate(number_na_bai = rowSums(is.na(select(.,starts_with("bai"))))) %>% #create a variable to count missing
  mutate(bai_sum = if_else(number_na_bai == 21, NA_real_,bai_sum)) #if all is missing

4.2.2 Add classification

Dataset <- Dataset %>% 
  mutate(bai_class = case_when(
    bai_sum <= 10 ~ "minima",
    bai_sum < 20 ~ "leve",
    bai_sum < 31 ~ "moderada",
    bai_sum >= 31 ~ "grave"))

Dataset <- Dataset %>% 
  mutate(bai_class = factor(bai_class,
         levels=c("minima","leve","moderada","grave")))

5 Cronbach’s alpha

In the reality, the manuscript starts here.

5.1 Depression

Dataset %>% 
  select(starts_with("bdi"), -bdi_sum, -bdi_class) %>% 
  alpha()

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.89       0.9     0.9      0.29 8.7 0.0034 0.44 0.37     0.29
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.89 0.9 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bdi_1       0.89      0.89    0.89      0.29 8.1   0.0036 0.0049  0.29
## bdi_2       0.89      0.89    0.90      0.30 8.4   0.0035 0.0050  0.30
## bdi_3       0.89      0.89    0.90      0.29 8.2   0.0035 0.0049  0.29
## bdi_4       0.89      0.89    0.90      0.29 8.2   0.0036 0.0050  0.29
## bdi_5       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_6       0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_7       0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_8       0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_9       0.89      0.90    0.90      0.30 8.6   0.0034 0.0045  0.30
## bdi_10      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## bdi_11      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.30
## bdi_12      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_13      0.89      0.89    0.90      0.29 8.2   0.0036 0.0051  0.29
## bdi_14      0.89      0.89    0.89      0.29 8.1   0.0036 0.0047  0.29
## bdi_15      0.89      0.89    0.89      0.29 8.1   0.0036 0.0046  0.29
## bdi_16      0.89      0.89    0.90      0.30 8.5   0.0035 0.0048  0.30
## bdi_17      0.89      0.89    0.90      0.29 8.2   0.0036 0.0053  0.29
## bdi_18      0.89      0.89    0.90      0.30 8.4   0.0035 0.0052  0.29
## bdi_19      0.89      0.89    0.90      0.29 8.3   0.0035 0.0052  0.29
## bdi_20      0.89      0.89    0.89      0.29 8.2   0.0036 0.0048  0.29
## bdi_21      0.89      0.89    0.90      0.30 8.5   0.0035 0.0051  0.30
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop  mean   sd
## bdi_1  1946  0.64  0.66  0.65   0.60 0.248 0.53
## bdi_2  1947  0.52  0.52  0.48   0.45 0.543 0.69
## bdi_3  1949  0.58  0.60  0.57   0.53 0.298 0.57
## bdi_4  1947  0.62  0.62  0.60   0.56 0.375 0.59
## bdi_5  1946  0.56  0.56  0.53   0.50 0.488 0.61
## bdi_6  1946  0.48  0.50  0.46   0.43 0.162 0.51
## bdi_7  1945  0.66  0.65  0.64   0.60 0.386 0.70
## bdi_8  1949  0.59  0.58  0.55   0.52 0.790 0.72
## bdi_9  1947  0.40  0.44  0.40   0.36 0.072 0.32
## bdi_10 1947  0.52  0.51  0.47   0.45 0.342 0.69
## bdi_11 1941  0.54  0.53  0.49   0.47 0.544 0.69
## bdi_12 1943  0.60  0.61  0.58   0.54 0.433 0.62
## bdi_13 1943  0.61  0.60  0.57   0.54 0.464 0.79
## bdi_14 1941  0.64  0.64  0.62   0.59 0.307 0.67
## bdi_15 1939  0.66  0.65  0.64   0.60 0.565 0.69
## bdi_16 1933  0.53  0.51  0.48   0.45 0.855 0.78
## bdi_17 1938  0.60  0.60  0.57   0.54 0.434 0.64
## bdi_18 1942  0.55  0.54  0.50   0.48 0.535 0.71
## bdi_19 1942  0.59  0.57  0.54   0.52 0.756 0.80
## bdi_20 1942  0.63  0.62  0.60   0.57 0.527 0.69
## bdi_21 1939  0.48  0.51  0.47   0.44 0.136 0.45
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bdi_1  0.79 0.17 0.03 0.01 0.01
## bdi_2  0.55 0.38 0.05 0.02 0.01
## bdi_3  0.75 0.20 0.04 0.01 0.00
## bdi_4  0.68 0.28 0.04 0.01 0.01
## bdi_5  0.56 0.40 0.03 0.01 0.01
## bdi_6  0.88 0.10 0.01 0.02 0.01
## bdi_7  0.72 0.20 0.06 0.02 0.01
## bdi_8  0.37 0.50 0.11 0.02 0.00
## bdi_9  0.94 0.05 0.00 0.01 0.01
## bdi_10 0.77 0.14 0.07 0.02 0.01
## bdi_11 0.55 0.39 0.04 0.02 0.01
## bdi_12 0.63 0.33 0.03 0.01 0.01
## bdi_13 0.67 0.24 0.04 0.05 0.01
## bdi_14 0.80 0.10 0.09 0.01 0.01
## bdi_15 0.54 0.37 0.08 0.01 0.01
## bdi_16 0.35 0.49 0.12 0.04 0.01
## bdi_17 0.64 0.31 0.04 0.01 0.01
## bdi_18 0.57 0.34 0.06 0.02 0.01
## bdi_19 0.46 0.34 0.18 0.02 0.01
## bdi_20 0.57 0.35 0.06 0.02 0.01
## bdi_21 0.90 0.08 0.02 0.01 0.01

5.2 Anxiety

Dataset %>% 
  select(starts_with("bai"), -bai_sum, -bai_class) %>% 
  alpha()

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##        0.9       0.9    0.91      0.31 9.4 0.0032 0.41 0.39     0.31
## 
##  lower alpha upper     95% confidence boundaries
## 0.89 0.9 0.91 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## bai_1       0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## bai_2       0.90       0.9    0.91      0.32 9.2   0.0032 0.0077  0.31
## bai_3       0.90       0.9    0.91      0.31 9.0   0.0033 0.0082  0.31
## bai_4       0.89       0.9    0.91      0.31 8.8   0.0034 0.0074  0.30
## bai_5       0.89       0.9    0.90      0.30 8.7   0.0034 0.0071  0.30
## bai_6       0.90       0.9    0.91      0.31 9.0   0.0033 0.0080  0.30
## bai_7       0.89       0.9    0.91      0.31 8.8   0.0034 0.0075  0.30
## bai_8       0.89       0.9    0.91      0.31 8.8   0.0033 0.0078  0.30
## bai_9       0.90       0.9    0.91      0.31 8.9   0.0033 0.0075  0.31
## bai_10      0.89       0.9    0.90      0.30 8.7   0.0034 0.0074  0.30
## bai_11      0.89       0.9    0.90      0.30 8.8   0.0034 0.0074  0.30
## bai_12      0.89       0.9    0.90      0.31 8.8   0.0033 0.0077  0.30
## bai_13      0.90       0.9    0.90      0.30 8.8   0.0033 0.0074  0.30
## bai_14      0.90       0.9    0.91      0.31 8.9   0.0033 0.0079  0.31
## bai_15      0.90       0.9    0.91      0.31 9.0   0.0033 0.0076  0.30
## bai_16      0.90       0.9    0.91      0.32 9.4   0.0032 0.0068  0.31
## bai_17      0.89       0.9    0.90      0.31 8.8   0.0034 0.0076  0.30
## bai_18      0.90       0.9    0.91      0.31 9.1   0.0033 0.0082  0.31
## bai_19      0.90       0.9    0.91      0.31 9.0   0.0033 0.0081  0.31
## bai_20      0.90       0.9    0.91      0.32 9.4   0.0031 0.0070  0.31
## bai_21      0.90       0.9    0.91      0.31 9.1   0.0033 0.0081  0.31
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## bai_1  1944  0.53  0.53  0.49   0.47 0.41 0.63
## bai_2  1946  0.49  0.48  0.44   0.42 0.69 0.75
## bai_3  1940  0.55  0.56  0.53   0.50 0.34 0.63
## bai_4  1941  0.66  0.63  0.62   0.59 0.89 0.89
## bai_5  1947  0.70  0.68  0.67   0.64 0.69 0.90
## bai_6  1949  0.57  0.58  0.55   0.52 0.28 0.61
## bai_7  1948  0.65  0.65  0.63   0.60 0.34 0.68
## bai_8  1948  0.63  0.63  0.61   0.58 0.36 0.65
## bai_9  1942  0.60  0.61  0.59   0.55 0.20 0.53
## bai_10 1946  0.70  0.67  0.66   0.63 1.04 0.88
## bai_11 1944  0.65  0.66  0.65   0.61 0.22 0.59
## bai_12 1946  0.63  0.64  0.63   0.58 0.30 0.63
## bai_13 1942  0.63  0.65  0.64   0.59 0.17 0.47
## bai_14 1947  0.58  0.59  0.56   0.53 0.24 0.59
## bai_15 1945  0.57  0.59  0.57   0.52 0.22 0.57
## bai_16 1947  0.41  0.41  0.36   0.34 0.18 0.56
## bai_17 1940  0.65  0.65  0.63   0.60 0.39 0.69
## bai_18 1944  0.56  0.55  0.51   0.49 0.55 0.77
## bai_19 1939  0.53  0.56  0.53   0.49 0.14 0.44
## bai_20 1940  0.43  0.43  0.38   0.36 0.50 0.71
## bai_21 1944  0.53  0.53  0.50   0.47 0.37 0.66
## 
## Non missing response frequency for each item
##           0    1    2    3 miss
## bai_1  0.67 0.26 0.07 0.00 0.01
## bai_2  0.47 0.38 0.14 0.01 0.01
## bai_3  0.74 0.19 0.07 0.01 0.01
## bai_4  0.41 0.35 0.19 0.05 0.01
## bai_5  0.56 0.24 0.15 0.05 0.01
## bai_6  0.80 0.13 0.06 0.01 0.00
## bai_7  0.76 0.16 0.07 0.01 0.00
## bai_8  0.73 0.20 0.06 0.01 0.00
## bai_9  0.85 0.10 0.04 0.01 0.01
## bai_10 0.30 0.42 0.21 0.07 0.01
## bai_11 0.85 0.09 0.05 0.01 0.01
## bai_12 0.78 0.15 0.06 0.01 0.01
## bai_13 0.87 0.10 0.03 0.00 0.01
## bai_14 0.83 0.12 0.04 0.01 0.01
## bai_15 0.85 0.10 0.04 0.01 0.01
## bai_16 0.89 0.06 0.03 0.02 0.01
## bai_17 0.71 0.20 0.07 0.01 0.01
## bai_18 0.60 0.27 0.10 0.02 0.01
## bai_19 0.89 0.08 0.03 0.00 0.01
## bai_20 0.61 0.29 0.09 0.01 0.01
## bai_21 0.72 0.21 0.06 0.01 0.01

6 Table 1

Country

Dataset %>% 
  freq(country)

## Frequencies  
## Dataset$country  
## Type: Factor  
## 
##                  Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## -------------- ------ --------- -------------- --------- --------------
##          SPAIN   1216     62.14          62.14     62.14          62.14
##       PORTUGAL    426     21.77          83.90     21.77          83.90
##         BRAZIL    315     16.10         100.00     16.10         100.00
##           <NA>      0                               0.00         100.00
##          Total   1957    100.00         100.00    100.00         100.00

Check differences

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(sex))$p.value)

## # A tibble: 3 x 2
##   country    pvalue
##   <fct>       <dbl>
## 1 SPAIN    7.34e-37
## 2 PORTUGAL 3.33e- 1
## 3 BRAZIL   3.38e- 1

sex

Dataset %>% 
  group_by(country) %>% 
  freq(sex)

## Frequencies  
## Dataset$sex  
## Type: Factor  
## Group: country = SPAIN  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    384     31.76          31.76     31.58          31.58
##           F    825     68.24         100.00     67.85          99.42
##        <NA>      7                               0.58         100.00
##       Total   1216    100.00         100.00    100.00         100.00
## 
## Group: country = PORTUGAL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    203     47.65          47.65     47.65          47.65
##           F    223     52.35         100.00     52.35         100.00
##        <NA>      0                               0.00         100.00
##       Total    426    100.00         100.00    100.00         100.00
## 
## Group: country = BRAZIL  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           M    149     47.30          47.30     47.30          47.30
##           F    166     52.70         100.00     52.70         100.00
##        <NA>      0                               0.00         100.00
##       Total    315    100.00         100.00    100.00         100.00

Age

Dataset %>% 
  group_by(country, sex) %>% 
  descr(age)

## Descriptive Statistics  
## Dataset$age  
## Group: country = SPAIN, sex = M  
## N: 384  
## 
##                        age
## ----------------- --------
##              Mean    21.43
##           Std.Dev     3.48
##               Min    18.00
##                Q1    19.00
##            Median    21.00
##                Q3    22.00
##               Max    48.00
##               MAD     2.97
##               IQR     3.00
##                CV     0.16
##          Skewness     2.85
##       SE.Skewness     0.12
##          Kurtosis    14.11
##           N.Valid   384.00
##         Pct.Valid   100.00
## 
## Group: country = SPAIN, sex = F  
## N: 825  
## 
##                        age
## ----------------- --------
##              Mean    21.52
##           Std.Dev     2.77
##               Min    18.00
##                Q1    20.00
##            Median    21.00
##                Q3    23.00
##               Max    43.00
##               MAD     2.97
##               IQR     3.00
##                CV     0.13
##          Skewness     2.29
##       SE.Skewness     0.09
##          Kurtosis    11.63
##           N.Valid   823.00
##         Pct.Valid    99.76
## 
## Group: country = SPAIN, sex = NA  
## N: 7  
## 
##                       age
## ----------------- -------
##              Mean   21.33
##           Std.Dev    4.16
##               Min   18.00
##                Q1   18.00
##            Median   20.00
##                Q3   26.00
##               Max   26.00
##               MAD    2.97
##               IQR    4.00
##                CV    0.20
##          Skewness    0.29
##       SE.Skewness    1.22
##          Kurtosis   -2.33
##           N.Valid    3.00
##         Pct.Valid   42.86
## 
## Group: country = PORTUGAL, sex = M  
## N: 203  
## 
##                        age
## ----------------- --------
##              Mean    20.51
##           Std.Dev     1.64
##               Min    18.00
##                Q1    19.00
##            Median    21.00
##                Q3    22.00
##               Max    23.00
##               MAD     1.48
##               IQR     3.00
##                CV     0.08
##          Skewness    -0.06
##       SE.Skewness     0.17
##          Kurtosis    -1.23
##           N.Valid   203.00
##         Pct.Valid   100.00
## 
## Group: country = PORTUGAL, sex = F  
## N: 223  
## 
##                        age
## ----------------- --------
##              Mean    20.37
##           Std.Dev     1.68
##               Min    18.00
##                Q1    19.00
##            Median    20.00
##                Q3    22.00
##               Max    23.00
##               MAD     1.48
##               IQR     3.00
##                CV     0.08
##          Skewness     0.09
##       SE.Skewness     0.16
##          Kurtosis    -1.30
##           N.Valid   223.00
##         Pct.Valid   100.00
## 
## Group: country = BRAZIL, sex = M  
## N: 149  
## 
##                        age
## ----------------- --------
##              Mean    22.13
##           Std.Dev     4.31
##               Min    17.00
##                Q1    20.00
##            Median    21.00
##                Q3    23.00
##               Max    41.00
##               MAD     1.48
##               IQR     3.00
##                CV     0.19
##          Skewness     2.40
##       SE.Skewness     0.21
##          Kurtosis     6.45
##           N.Valid   133.00
##         Pct.Valid    89.26
## 
## Group: country = BRAZIL, sex = F  
## N: 166  
## 
##                        age
## ----------------- --------
##              Mean    23.41
##           Std.Dev     8.87
##               Min    17.00
##                Q1    19.00
##            Median    21.00
##                Q3    23.00
##               Max    68.00
##               MAD     2.97
##               IQR     4.00
##                CV     0.38
##          Skewness     3.18
##       SE.Skewness     0.19
##          Kurtosis    10.26
##           N.Valid   160.00
##         Pct.Valid    96.39

Check differences

Dataset %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(t_test = map(data, ~t.test(.$age ~ .$sex)$p.value)) %>%  
  unnest(t_test)

## # A tibble: 3 x 3
## # Groups:   country [3]
##   country  data                  t_test
##   <fct>    <list>                 <dbl>
## 1 SPAIN    <tibble [1,216 x 93]>  0.669
## 2 PORTUGAL <tibble [426 x 93]>    0.387
## 3 BRAZIL   <tibble [315 x 93]>    0.109

This table was checked twice. Last checked on November 4 2020.

7 Table 2

The table in the manuscript presents some descriptive analysis and inferential results. However, it does not present a (formal or typical) regression analysis, but rather several univariate tests. These last tests were conducted after some dispute during the peer review. However, the results are parallel and they are presented below.

7.1 Depression

7.1.1 Total

Dataset %>% 
  descr(bdi_sum)

## Descriptive Statistics  
## Dataset$bdi_sum  
## N: 1957  
## 
##                     bdi_sum
## ----------------- ---------
##              Mean      9.23
##           Std.Dev      7.74
##               Min      0.00
##                Q1      4.00
##            Median      8.00
##                Q3     13.00
##               Max     57.00
##               MAD      5.93
##               IQR      9.00
##                CV      0.84
##          Skewness      1.62
##       SE.Skewness      0.06
##          Kurtosis      4.11
##           N.Valid   1949.00
##         Pct.Valid     99.59

7.1.2 By country

Dataset %>%
  group_by(country) %>% 
  descr(bdi_sum)

## Descriptive Statistics  
## bdi_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.86       9.05    10.89
##           Std.Dev      7.54       7.73     8.29
##               Min      0.00       0.00     0.00
##                Q1      3.00       4.00     5.00
##            Median      7.00       7.00     9.50
##                Q3     12.00      12.00    15.00
##               Max     57.00      53.00    41.00
##               MAD      5.93       5.93     8.15
##               IQR      9.00       8.00    10.00
##                CV      0.85       0.85     0.76
##          Skewness      1.77       1.76     1.01
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      5.12       5.11     0.80
##           N.Valid   1212.00     423.00   314.00
##         Pct.Valid     99.67      99.30    99.68

Percentages

Dataset %>% 
  group_by(country) %>% 
  filter(!is.na(bdi_class)) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)

## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      80.4  11.4     5.69  2.56
## 2 PORTUGAL   79.0  11.8     6.38  2.84
## 3 BRAZIL     68.8  16.2    10.8   4.14

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bdi_class))$p.value)

## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    0.       
## 2 PORTUGAL 1.48e-143
## 3 BRAZIL   2.67e- 71

7.1.3 By sex

Dataset %>%
  group_by(sex) %>% 
  descr(bdi_sum)

## Descriptive Statistics  
## bdi_sum by sex  
## Data Frame: Dataset  
## N: 736  
## 
##                          M         F       NA
## ----------------- -------- --------- --------
##              Mean     8.68      9.59     5.14
##           Std.Dev     7.74      7.73     5.46
##               Min     0.00      0.00     1.00
##                Q1     3.00      4.00     2.00
##            Median     7.00      8.00     3.00
##                Q3    12.00     13.00     9.00
##               Max    57.00     53.00    16.00
##               MAD     5.93      7.41     1.48
##               IQR     9.00      9.00     4.00
##                CV     0.89      0.81     1.06
##          Skewness     1.89      1.47     1.04
##       SE.Skewness     0.09      0.07     0.79
##          Kurtosis     5.63      3.29    -0.65
##           N.Valid   732.00   1210.00     7.00
##         Pct.Valid    99.46     99.67   100.00

Percentages

Dataset %>% 
  filter(!is.na(sex), !is.na(bdi_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bdi_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bdi_class),
              values_from = prop)

## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       81.0  10.7     5.05  3.28
## 2 F       76.4  13.2     7.69  2.64

Individual chi square for each sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

7.1.4 Regression analysis

Regress the results of depression (BDI) on sex and country.

mod_completo_bdi <- lm(bdi_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bdi)

## 
## 
## ANOVA results using bdi_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  35232.64    1 35232.64 595.18 .000             
##          sex    528.14    1   528.14   8.92 .003          .00
##      country   1179.55    2   589.77   9.96 .000          .01
##        Error 114723.93 1938    59.20                         
##  CI_90_partial_eta2
##                    
##          [.00, .01]
##          [.00, .02]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

Check the assumptions

#normality of the residuals
olsrr::ols_test_normality(mod_completo_bdi)

## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8825         0.0000 
## Kolmogorov-Smirnov        0.1111         0.0000 
## Cramer-von Mises         171.717         0.0000 
## Anderson-Darling         47.4803         0.0000 
## -----------------------------------------------

#equal variance of the residuals
olsrr::ols_test_breusch_pagan(mod_completo_bdi)

## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bdi_sum 
##  Variables: fitted values of bdi_sum 
## 
##         Test Summary         
##  ----------------------------
##  DF            =    1 
##  Chi2          =    2.501723 
##  Prob > Chi2   =    0.1137219

#all plots
lindia::gg_diagnose(mod_completo_bdi)

7.2 Anxiety

7.2.1 Total

Dataset %>% 
  descr(bai_sum)

## Descriptive Statistics  
## Dataset$bai_sum  
## N: 1957  
## 
##                     bai_sum
## ----------------- ---------
##              Mean      8.48
##           Std.Dev      8.11
##               Min      0.00
##                Q1      3.00
##            Median      6.00
##                Q3     12.00
##               Max     48.00
##               MAD      5.93
##               IQR      9.00
##                CV      0.96
##          Skewness      1.71
##       SE.Skewness      0.06
##          Kurtosis      3.48
##           N.Valid   1952.00
##         Pct.Valid     99.74

7.2.2 By country

Dataset %>% 
  group_by(country) %>% 
  descr(bai_sum)

## Descriptive Statistics  
## bai_sum by country  
## Data Frame: Dataset  
## N: 1216  
## 
##                       SPAIN   PORTUGAL   BRAZIL
## ----------------- --------- ---------- --------
##              Mean      8.55       7.92     9.01
##           Std.Dev      8.06       8.04     8.40
##               Min      0.00       0.00     0.00
##                Q1      3.00       2.00     3.00
##            Median      6.00       5.00     6.00
##                Q3     12.00      11.00    12.00
##               Max     48.00      45.00    46.00
##               MAD      5.93       5.93     5.93
##               IQR      9.00       9.00     9.00
##                CV      0.94       1.02     0.93
##          Skewness      1.68       1.82     1.64
##       SE.Skewness      0.07       0.12     0.14
##          Kurtosis      3.38       3.87     3.31
##           N.Valid   1213.00     424.00   315.00
##         Pct.Valid     99.75      99.53   100.00

Percentages

Dataset %>% 
  filter(!is.na(sex), !is.na(bai_class)) %>% #<- filtering
  group_by(sex) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)

## # A tibble: 2 x 5
## # Groups:   sex [2]
##   sex   minima  leve moderada grave
##   <fct>  <dbl> <dbl>    <dbl> <dbl>
## 1 M       79.8  13.8     4.38  2.05
## 2 F       65.7  22.5     8.73  3.13

7.2.3 By sex

Dataset %>% 
  filter(!is.na(sex)) %>% #<- filtering
  group_by(sex) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 2 x 2
##   sex      pvalue
##   <fct>     <dbl>
## 1 M     5.09e-258
## 2 F     1.51e-252

7.2.4 Regression analysis

mod_completo_bai <- lm(bai_sum ~ sex + country, Dataset)
apaTables::apa.aov.table(mod_completo_bai)

## 
## 
## ANOVA results using bai_sum as the dependent variable
##  
## 
##    Predictor        SS   df       MS      F    p partial_eta2
##  (Intercept)  23701.50    1 23701.50 372.46 .000             
##          sex   3248.35    1  3248.35  51.05 .000          .03
##      country    252.51    2   126.25   1.98 .138          .00
##        Error 123515.25 1941    63.63                         
##  CI_90_partial_eta2
##                    
##          [.02, .04]
##          [.00, .01]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

Assumptions

#normality
olsrr::ols_test_normality(mod_completo_bai)

## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.8549         0.0000 
## Kolmogorov-Smirnov        0.1427         0.0000 
## Cramer-von Mises         186.2753        0.0000 
## Anderson-Darling         72.9096         0.0000 
## -----------------------------------------------

#equal variance
olsrr::ols_test_breusch_pagan(mod_completo_bai)

## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##                Data                 
##  -----------------------------------
##  Response : bai_sum 
##  Variables: fitted values of bai_sum 
## 
##          Test Summary           
##  -------------------------------
##  DF            =    1 
##  Chi2          =    27.60531 
##  Prob > Chi2   =    1.487695e-07

#all plots
lindia::gg_diagnose(mod_completo_bai)

Assumptions were violated and univariate analyses were suggested.

7.3 Depression (country, non-parametric)

Kruskall-Walis is a non-parametric procedure, which is similar to perfoming a one-way ANOVA on ranks.

kruskal.test(bdi_sum ~ country, data = Dataset)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  bdi_sum by country
## Kruskal-Wallis chi-squared = 17.675, df = 2, p-value = 0.0001452

The results suggest a statistically significant difference. The effect size is compute by the code below.

Dataset %>% 
  rstatix::kruskal_effsize(bdi_sum ~ country) #effect size

## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bdi_sum  1957 0.00802 eta2[H] small

7.4 Post hoc

Pairwise Test for Multiple Comparisons of Mean Rank Sums (Nemenyi-Tests).

PMCMR::posthoc.kruskal.nemenyi.test(x=Dataset$bdi_sum, g=Dataset$country, method="Tukey")

## 
##  Pairwise comparisons using Tukey and Kramer (Nemenyi) test  
##                    with Tukey-Dist approximation for independent samples 
## 
## data:  Dataset$bdi_sum and Dataset$country 
## 
##          SPAIN   PORTUGAL
## PORTUGAL 0.9351  -       
## BRAZIL   9.8e-05 0.0031  
## 
## P value adjustment method: none

And Dunn test.

Dataset %>% 
  rstatix::dunn_test(bdi_sum ~ country, p.adjust.method = "bonferroni")

## # A tibble: 3 x 9
##   .y.     group1   group2      n1    n2 statistic        p    p.adj p.adj.signif
## * <chr>   <chr>    <chr>    <int> <int>     <dbl>    <dbl>    <dbl> <chr>       
## 1 bdi_sum SPAIN    PORTUGAL  1212   423     0.349  7.27e-1  1.00e+0 ns          
## 2 bdi_sum SPAIN    BRAZIL    1212   314     4.16   3.22e-5  9.67e-5 ****        
## 3 bdi_sum PORTUGAL BRAZIL     423   314     3.27   1.08e-3  3.24e-3 **

dunn.test::dunn.test(Dataset$bdi_sum,Dataset$country,method = "Bonferroni")

##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 17.675, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |     BRAZIL   PORTUGAL
## ---------+----------------------
## PORTUGAL |   3.269105
##          |    0.0016*
##          |
##    SPAIN |   4.157182   0.349355
##          |    0.0000*     1.0000
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

7.5 Depression (sex, non-parametric)

wilcox.test(bdi_sum ~ sex, data = Dataset)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bdi_sum by sex
## W = 404216, p-value = 0.001232
## alternative hypothesis: true location shift is not equal to 0

Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bdi_sum, sex,distribution = "asymptotic")}

## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 732) | 2 = F (n = 1210):
##   U = 672494.000, W = 404216.000, p = 0.001, Z = -3.231
##   effect-size r =    0.073
##    rank-mean(1) =  918.71
##    rank-mean(2) = 1003.44

7.6 Anxiety (country, non-parametric)

kruskal.test(bai_sum ~ country, data = Dataset)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  bai_sum by country
## Kruskal-Wallis chi-squared = 5.7509, df = 2, p-value = 0.05639

Dataset %>% 
  rstatix::kruskal_effsize(bai_sum ~ country) #effect size

## # A tibble: 1 x 5
##   .y.         n effsize method  magnitude
## * <chr>   <int>   <dbl> <chr>   <ord>    
## 1 bai_sum  1957 0.00192 eta2[H] small

Dataset %>% 
  group_by(country) %>%
  filter(!is.na(bai_class)) %>% 
  count(bai_class) %>% 
  mutate(prop=prop.table(n)*100) %>% 
  select(-n) %>% 
  pivot_wider(names_from = c(bai_class),
              values_from = prop)

## # A tibble: 3 x 5
## # Groups:   country [3]
##   country  minima  leve moderada grave
##   <fct>     <dbl> <dbl>    <dbl> <dbl>
## 1 SPAIN      71.2  19.0     7.17  2.64
## 2 PORTUGAL   72.6  18.4     5.90  3.07
## 3 BRAZIL     67.3  21.6     8.25  2.86

Individual chi square for each country

Dataset %>% 
  group_by(country) %>% 
  summarise(pvalue = chisq.test(table(bai_class))$p.value)

## # A tibble: 3 x 2
##   country     pvalue
##   <fct>        <dbl>
## 1 SPAIN    2.04e-314
## 2 PORTUGAL 8.19e-116
## 3 BRAZIL   6.20e- 70

7.7 Anxiety (sex, non-parametric)

wilcox.test(bai_sum ~ sex, data = Dataset)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  bai_sum by sex
## W = 348512, p-value = 1.85e-15
## alternative hypothesis: true location shift is not equal to 0

Dataset %>% 
  filter(!is.na(sex)) %>% 
  {sjstats::mwu(.,bai_sum, sex,distribution = "asymptotic")}

## 
## # Mann-Whitney-U-Test
## 
## Groups 1 = M (n = 731) | 2 = F (n = 1214):
##   U = 616057.500, W = 348511.500, p < 0.001, Z = -7.951
##   effect-size r =    0.180
##    rank-mean(1) =  842.76
##    rank-mean(2) = 1051.42

8 Correlation

cor.test(Dataset$bdi_sum, Dataset$bai_sum, method = "spearman", use = "pairwise.complete.obs")

## 
##  Spearman's rank correlation rho
## 
## data:  Dataset$bdi_sum and Dataset$bai_sum
## S = 522131145, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5761969

9 Export to csv

# CSV
write.table(Dataset, file = "dataset_mapfre.csv", sep = ",", col.names = TRUE,row.names = FALSE, qmethod = "double")

write.table(backup, file = "dataset_original_mapfre.csv", sep = ",", col.names = TRUE,row.names = FALSE, qmethod = "double")

End of codes. If use, please cite.

Depression and anxiety symptoms in university students from Spain, Portugal and Brazil: Statistical analyses

Luis Anunciacao, PhD

Nov 4, 2020

1 Global

2 Packages

3 Data checking

3.1 Boxplot

3.2 Missing

3.3 Locating participants will all missing

3.4 Multiple imputation

4 Composite scores

4.1 Depression

4.1.1 Add total scores

4.1.2 Add classification

4.2 Anxiety

4.2.1 Add total scores

4.2.2 Add classification

5 Cronbach’s alpha

5.1 Depression

5.2 Anxiety

6 Table 1

7 Table 2

7.1 Depression

7.1.1 Total

7.1.2 By country

7.1.3 By sex

7.1.4 Regression analysis

7.2 Anxiety

7.2.1 Total

7.2.2 By country

7.2.3 By sex

7.2.4 Regression analysis

7.3 Depression (country, non-parametric)

7.4 Post hoc

7.5 Depression (sex, non-parametric)

7.6 Anxiety (country, non-parametric)

7.7 Anxiety (sex, non-parametric)

8 Correlation

9 Export to csv