suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(haven))
setwd("C:/Dropbox/Laboratorio/Pedro Brandao/Covid/media_trato_imputado")
#setwd("~/Dropbox/Laboratorio/Pedro Brandao/Covid/media_trato_imputado")
cog_tract <- read.csv("cog_tract.csv",
row.names=1, na.strings= c("NA", ""))
# Importa de CSV com NA e ""
covid <- read.csv("covid_controle.csv",
row.names=1, na.strings= c("NA", ""))
tratos <- read.csv("tratos_covid_controle.csv",
row.names=1, na.strings= c("NA", ""))
# Limpa formatação do SPSS
covid <- zap_formats(zap_labels(covid))
tratos <- zap_formats(zap_labels(tratos))
# Renomeia as variáveis
covid <- covid %>%
janitor::clean_names()
# Renomeia as variáveis
tratos <- tratos %>%
janitor::clean_names()
options(max.print=1000000)
#str(covid, list.len = ncol(covid))
#str(tratos, list.len = ncol(tratos))
# MoCA domain scores were calculated as means of single item scores and comprised subscores for
# a) orientation (spatial and temporal orientation),
# b) attention (digit span, letter A tapping and subtraction),
# c) executive function (trail making, abstraction and word fluency),
# d) visuoconstructive function (cube copying and clock drawing)
# e) language (naming, sentence repetition)
# f) memory (delayed word recall).
# Domain scores were not adjusted for years of education.
covid$moca_visuoconstrutivo = covid$moca02_cubo + covid$moca03td_rcontorno + covid$moca04td_rnum +
covid$moca05td_rpont
covid$moca_atencao = covid$moca11_vigilancia + covid$moca09digitspan_direto + covid$moca10digitspan_indireto +
covid$esc_subtracao7
covid$moca_executivo = covid$moca01_tmt + covid$moca20abstracao1 + covid$moca21abstracao2 + covid$moca19fluenciaverbfon
covid$moca_linguagem = covid$moca06nome_leao + covid$moca07nome_rino + covid$moca08nome_camelo +
covid$moca17repeticao1 + covid$moca18repeticao2
covid$moca_orientacao = covid$moca26evocacao5 + covid$moca27orientacao1 + covid$moca28orientacao2 +
covid$moca29orientacao3 + covid$moca30orientacao4 + covid$moca31orientacao5 + covid$moca32orientacao6
covid$moca_memoria = covid$moca22evocacao1 + covid$moca23evocacao2 + covid$moca24evocacao3 +
covid$moca25evocacao4 + covid$moca26evocacao5
### EXTRAS ###
# Nomeação
covid$moca_nomeacao = covid$moca06nome_leao + covid$moca07nome_rino + covid$moca08nome_camelo
# Subtração
covid$moca_subtracao = covid$moca12subtracao7_1 + covid$moca13subtracao7_2 + covid$moca14subtracao7_3 +
covid$moca15subtracao7_4 + covid$moca16subtracao7_5
# Recoding
covid$grupo_num <- if_else(covid$grupo == "COVID", 1, 0)
covid$grupo_num <- as.numeric(covid$grupo_num)
covid$idade_x <- as.numeric(covid$idade_x)
covid$gender_num <- if_else(covid$gender == "F", 1, 0)
covid$gender_num <- as.numeric(covid$gender_num)
#colnames(covid)
# Seleciona variáveis cognitivas
cogn <- covid %>% dplyr::select(otslc1sd:otspsfc, otspsfc_percentile:palfams,
palfams_percentile:paltea, paltea_percentile:vrmirtc,
q0023, cubo_erros_plano,
pdcrs_relogio_total:cube_total_error_score,
moca_visuoconstrutivo:moca_subtracao)
# Exclui dados ausentes
cog <- cogn[complete.cases(cogn),]
#str(cog, list.len = ncol(cog))
# Renomeia a fluencia_f
cog <- dplyr::rename(cog, fluencia_f = q0023)
library(caret)
## Carregando pacotes exigidos: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
# NEAR ZERO VARIANCES
nzv <- nearZeroVar(cog, saveMetrics= TRUE)
nzv[nzv$nzv,][1:10,]
## freqRatio percentUnique zeroVar nzv
## palte2 29.33333 3.225806 FALSE TRUE
## paltea2 29.33333 3.225806 FALSE TRUE
## rtifesnr 45.00000 3.225806 FALSE TRUE
## rtisesnr 30.00000 2.150538 FALSE TRUE
## swmde4 45.50000 2.150538 FALSE TRUE
## swmpr 0.00000 1.075269 TRUE TRUE
## swmwe4 45.00000 3.225806 FALSE TRUE
## NA NA NA NA NA
## NA.1 NA NA NA NA
## NA.2 NA NA NA NA
# Filtra variáveis sem variância
nzv <- nearZeroVar(cog)
filteredDescr <- cog[, -nzv] #zeroVar
dim(filteredDescr)
## [1] 93 145
cogn <- filteredDescr
#colnames(cogn)
cogn$idade <- as.numeric(covid$idade_x)
cogn$escolaridade <- as.numeric(covid$escolaridade)
cogn$group <- as.factor(covid$grupo)
cogn$sex <- as.numeric(covid$sexo)
cogn$age_scaled <- (covid$idade_x - mean(covid$idade_x)) / sd(covid$idade_x)
cogn$educ_scaled <- (covid$escolaridade - mean(covid$escolaridade)) / sd(covid$escolaridade)
cogn$educ <- sqrt(covid$escolaridade)
###Cantab
#SWMBE
#OTSPSFC
#OTSMDLFC
#OTSMCC
#OTSMLC
#PALTEA
#PALFAMS
#PALMETS
#PRMPCI
#PRMPCD
#VRMIRTC
#VRMDRTC
#VRMFRDS
#RTISMDRT
#RTISMDMT
#RTIFMDRT
#RTIFMDMT
#MoCAtotal2
#pdcrs_relogio_total
#cube_total_error_score
#cubo_vertices
#cubo_erros_plano
#cube_vertices_errors
#cfq_11_total
#### Explorar, na estatÃstica bayesiana, as famÃlias das métricas cognitivas:
#SWMBE (Poisson)*
#OTSPSFC (Poisson)
#OTSMDLFC (Negative binomial)
#OTSMCC (Linear)
#OTSMLC (Linear)
#PALTEA (Poisson)*
#PALFAMS (Poisson)
#PALMETS (Poisson)
#PRMPCI (Linear)
#PRMPCD (Linear)
#VRMIRTC (Linear)
#VRMDRTC (Linear)
#VRMFRDS (Linear)
#RTISMDRT (Linear)
#RTISMDMT (Linear)
#RTIFMDRT (Linear)
#RTIFMDMT (Linear)
#MoCAtotal2 (Linear)
#pdcrs_relogio_total (Quasi-Poisson)*
## cube_total_error_score (Poisson)*
## cubo_vertices (Quasi-Poisson)*
## cubo_erros_plano (Poisson)*
## cube_vertices_errors (Quasi-Poisson)*
# SWMBE
hist(covid$swmbe, breaks = 30)

library(flexplot)
##
## Attaching package: 'flexplot'
## The following object is masked from 'package:ggplot2':
##
## flip_data
flexplot(swmbe ~ group + age_scaled + educ_scaled | sex, data = cogn)

flexplot(swmbe ~ age_scaled + sex | group, data = cogn, method = "lm", se=T)
## Warning: Using shapes for an ordinal variable is not advised

library(lmPerm)
library(multcomp)
## Carregando pacotes exigidos: mvtnorm
## Carregando pacotes exigidos: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Carregando pacotes exigidos: TH.data
## Carregando pacotes exigidos: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
##
## Attaching package: 'multcomp'
## The following object is masked from 'package:flexplot':
##
## cholesterol
set.seed(123)
fit <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
perm = "Exact")
## [1] "Settings: unique SS : numeric variables centered"
anova(fit)
## Analysis of Variance Table
##
## Response: swmbe
## Df R Sum Sq R Mean Sq Iter Pr(Prob)
## group 1 205.1 205.09 1711 0.05552 .
## educ_scaled 1 15.3 15.35 51 0.90196
## age_scaled 1 1399.2 1399.16 5000 < 2e-16 ***
## sex 1 180.9 180.86 1844 0.05152 .
## Residuals 88 6761.5 76.84
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
fit1 <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
perm = "prob")
## [1] "Settings: unique SS : numeric variables centered"
anova(fit1)
## Analysis of Variance Table
##
## Response: swmbe
## Df R Sum Sq R Mean Sq F value Pr(>F)
## group 1 205.1 205.09 2.6692 0.1059
## educ_scaled 1 15.3 15.35 0.1998 0.6560
## age_scaled 1 1399.2 1399.16 18.2098 4.972e-05 ***
## sex 1 180.9 180.86 2.3539 0.1286
## Residuals 88 6761.5 76.84
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
fit2 <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
perm = "prob")
## [1] "Settings: unique SS : numeric variables centered"
anova(fit2)
## Analysis of Variance Table
##
## Response: swmbe
## Df R Sum Sq R Mean Sq F value Pr(>F)
## group 1 361.4 361.40 4.8542 0.03022 *
## educ_scaled 1 5.6 5.61 0.0753 0.78443
## age_scaled 1 1606.3 1606.26 21.5745 1.199e-05 ***
## sex 1 229.5 229.54 3.0830 0.08263 .
## age_scaled:sex 1 284.2 284.19 3.8170 0.05395 .
## Residuals 87 6477.3 74.45
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(MASS)
library(performance)
##
## Attaching package: 'performance'
## The following object is masked from 'package:flexplot':
##
## icc
fit3 <- glm(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
family = "poisson")
summary(fit3)
##
## Call:
## glm(formula = swmbe ~ group + educ_scaled + age_scaled + sex,
## family = "poisson", data = cogn)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.1443 -2.6082 0.0855 1.6077 4.6557
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.08112 0.10885 19.119 < 2e-16 ***
## groupCOVID 0.23283 0.06140 3.792 0.000149 ***
## educ_scaled -0.02464 0.02987 -0.825 0.409521
## age_scaled 0.29459 0.02916 10.104 < 2e-16 ***
## sex 0.18934 0.06075 3.117 0.001828 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 785.32 on 92 degrees of freedom
## Residual deviance: 663.09 on 88 degrees of freedom
## AIC: 1025.9
##
## Number of Fisher Scoring iterations: 5
# checking model assumptions
check_model(fit3)

fit4 <- glm(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
family = "poisson")
summary(fit4)
##
## Call:
## glm(formula = swmbe ~ group + educ_scaled + age_scaled + sex +
## age_scaled:sex, family = "poisson", data = cogn)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.8115 -2.2807 0.0121 1.7472 5.2029
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.80243 0.12709 14.182 < 2e-16 ***
## groupCOVID 0.32033 0.06396 5.008 5.50e-07 ***
## educ_scaled -0.01034 0.03002 -0.344 0.731
## age_scaled 0.85570 0.11528 7.423 1.15e-13 ***
## sex 0.30616 0.06747 4.538 5.69e-06 ***
## age_scaled:sex -0.32321 0.06443 -5.016 5.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 785.32 on 92 degrees of freedom
## Residual deviance: 638.00 on 87 degrees of freedom
## AIC: 1002.8
##
## Number of Fisher Scoring iterations: 5
check_model(fit4)

anova(fit3, fit4)
## Analysis of Deviance Table
##
## Model 1: swmbe ~ group + educ_scaled + age_scaled + sex
## Model 2: swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex
## Resid. Df Resid. Dev Df Deviance
## 1 88 663.09
## 2 87 638.00 1 25.096
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
fit5 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn)
summary(fit5)
##
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -3.98913 -1.26357 0.08126 1.25020 3.28165
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.32565 0.10948 21.242 < 2e-16 ***
## groupCOVID 0.21487 0.06116 3.513 0.000443 ***
## educ_scaled -0.02513 0.03037 -0.828 0.407947
## age_scaled 0.21644 0.02932 7.382 1.56e-13 ***
## sex 0.11736 0.06077 1.931 0.053441 .
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.56146 1.29461 -0.434 0.6645
## groupCOVID -0.30131 0.73835 -0.408 0.6832
## educ_scaled -0.04217 0.36408 -0.116 0.9078
## age_scaled -1.03395 0.50050 -2.066 0.0388 *
## sex -1.07664 0.76895 -1.400 0.1615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -423.5 on 10 Df
check_zeroinflation(fit5)
## # Check for zero-inflation
##
## Observed zeros: 10
## Predicted zeros: 0
## Ratio: 0.00
## Model is underfitting zeros (probable zero-inflation).
#check_model(fit5)
fit6 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn)
summary(fit6)
##
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex,
## data = cogn)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -3.81175 -1.26244 -0.01453 1.26324 3.81214
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.08440 0.12792 16.294 < 2e-16 ***
## groupCOVID 0.28846 0.06378 4.523 6.10e-06 ***
## educ_scaled -0.01323 0.03063 -0.432 0.66591
## age_scaled 0.68856 0.11610 5.931 3.01e-09 ***
## sex 0.21875 0.06750 3.241 0.00119 **
## age_scaled:sex -0.27139 0.06471 -4.194 2.74e-05 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.60319 1.35426 -0.445 0.656
## groupCOVID -0.33434 0.80345 -0.416 0.677
## educ_scaled -0.04465 0.36554 -0.122 0.903
## age_scaled -1.20847 1.74927 -0.691 0.490
## sex -1.02768 0.89133 -1.153 0.249
## age_scaled:sex 0.11165 1.06609 0.105 0.917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -414.7 on 12 Df
fit7 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
dist = "negbin")
summary(fit7)
##
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex,
## data = cogn, dist = "negbin")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.44018 -0.73588 -0.02194 0.64943 3.00350
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.03913 0.30054 6.785 1.16e-11 ***
## groupCOVID 0.28384 0.16049 1.769 0.0770 .
## educ_scaled -0.02604 0.07864 -0.331 0.7406
## age_scaled 0.76315 0.31498 2.423 0.0154 *
## sex 0.23944 0.16119 1.485 0.1374
## age_scaled:sex -0.30777 0.17530 -1.756 0.0791 .
## Log(theta) 0.99207 0.21069 4.709 2.49e-06 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.73161 1.48660 -0.492 0.623
## groupCOVID -0.31626 0.86601 -0.365 0.715
## educ_scaled -0.06168 0.40632 -0.152 0.879
## age_scaled -1.23180 1.85539 -0.664 0.507
## sex -1.00722 0.95281 -1.057 0.290
## age_scaled:sex 0.12272 1.12050 0.110 0.913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 2.6968
## Number of iterations in BFGS optimization: 19
## Log-likelihood: -320 on 13 Df
# Compare models
compare_performance(fit5, fit6, fit7, rank = TRUE)
## # Comparison of Model Performance Indices
##
## Name | Model | AIC | BIC | R2 | R2 (adj.) | RMSE | Sigma | Score_log | Score_spherical | Performance-Score
## -------------------------------------------------------------------------------------------------------------------------
## fit7 | zeroinfl | 666.029 | 698.953 | 0.965 | 0.963 | 8.394 | 9.050 | -3.441 | 0.069 | 85.51%
## fit6 | zeroinfl | 853.441 | 883.833 | 0.887 | 0.879 | 8.372 | 8.971 | -4.459 | 0.067 | 32.24%
## fit5 | zeroinfl | 867.011 | 892.337 | 0.867 | 0.860 | 8.509 | 9.007 | -4.554 | 0.067 | 7.21%
test_performance(fit5, fit6, fit7)
## Some of the nested models seem to be identical.
## Name | Model | BF
## ------------------------
## fit5 | zeroinfl |
## fit6 | zeroinfl | 70.24
## fit7 | zeroinfl | > 1000
## Models were detected as nested and are compared in sequential order.
plot(compare_performance(fit5, fit6, fit7, rank = TRUE))
## Registered S3 methods overwritten by 'parameters':
## method from
## as.double.parameters_kurtosis datawizard
## as.double.parameters_skewness datawizard
## as.double.parameters_smoothness datawizard
## as.numeric.parameters_kurtosis datawizard
## as.numeric.parameters_skewness datawizard
## as.numeric.parameters_smoothness datawizard
## print.parameters_distribution datawizard
## print.parameters_kurtosis datawizard
## print.parameters_skewness datawizard
## summary.parameters_kurtosis datawizard
## summary.parameters_skewness datawizard
