suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(haven))

setwd("C:/Dropbox/Laboratorio/Pedro Brandao/Covid/media_trato_imputado")

#setwd("~/Dropbox/Laboratorio/Pedro Brandao/Covid/media_trato_imputado")

cog_tract <- read.csv("cog_tract.csv", 
                  row.names=1, na.strings= c("NA", ""))
# Importa de CSV com NA e "" 
covid <- read.csv("covid_controle.csv", 
                  row.names=1, na.strings= c("NA", ""))

tratos <- read.csv("tratos_covid_controle.csv", 
                  row.names=1, na.strings= c("NA", ""))

# Limpa formatação do SPSS
covid <- zap_formats(zap_labels(covid))

tratos <- zap_formats(zap_labels(tratos))

# Renomeia as variáveis
covid <- covid %>%
  janitor::clean_names()

# Renomeia as variáveis
tratos <- tratos %>%
  janitor::clean_names()

options(max.print=1000000)

#str(covid, list.len = ncol(covid))
#str(tratos, list.len = ncol(tratos))


# MoCA domain scores were calculated as means of single item scores and comprised subscores for
# a) orientation (spatial and temporal orientation),
# b) attention (digit span, letter A tapping and subtraction),
# c) executive function (trail making, abstraction and word fluency),
# d) visuoconstructive function (cube copying and clock drawing)
# e) language (naming, sentence repetition)
# f) memory (delayed word recall).
# Domain scores were not adjusted for years of education.

covid$moca_visuoconstrutivo = covid$moca02_cubo + covid$moca03td_rcontorno + covid$moca04td_rnum +
  covid$moca05td_rpont

covid$moca_atencao = covid$moca11_vigilancia + covid$moca09digitspan_direto + covid$moca10digitspan_indireto +
  covid$esc_subtracao7

covid$moca_executivo = covid$moca01_tmt + covid$moca20abstracao1 + covid$moca21abstracao2 + covid$moca19fluenciaverbfon

covid$moca_linguagem = covid$moca06nome_leao + covid$moca07nome_rino + covid$moca08nome_camelo +
  covid$moca17repeticao1 + covid$moca18repeticao2

covid$moca_orientacao = covid$moca26evocacao5 + covid$moca27orientacao1 + covid$moca28orientacao2 +
  covid$moca29orientacao3 + covid$moca30orientacao4 + covid$moca31orientacao5 + covid$moca32orientacao6

covid$moca_memoria = covid$moca22evocacao1 + covid$moca23evocacao2 + covid$moca24evocacao3 +
  covid$moca25evocacao4 + covid$moca26evocacao5 

### EXTRAS ###
# Nomeação
covid$moca_nomeacao = covid$moca06nome_leao + covid$moca07nome_rino + covid$moca08nome_camelo

# Subtração
covid$moca_subtracao = covid$moca12subtracao7_1 + covid$moca13subtracao7_2 + covid$moca14subtracao7_3 + 
  covid$moca15subtracao7_4 + covid$moca16subtracao7_5

# Recoding
covid$grupo_num <- if_else(covid$grupo == "COVID", 1, 0)
covid$grupo_num <- as.numeric(covid$grupo_num)
covid$idade_x <- as.numeric(covid$idade_x)
covid$gender_num <- if_else(covid$gender == "F", 1, 0)
covid$gender_num <- as.numeric(covid$gender_num)

#colnames(covid)

# Seleciona variáveis cognitivas
cogn <- covid %>% dplyr::select(otslc1sd:otspsfc, otspsfc_percentile:palfams,
                                palfams_percentile:paltea, paltea_percentile:vrmirtc,
                                q0023, cubo_erros_plano,
                                pdcrs_relogio_total:cube_total_error_score,
                                moca_visuoconstrutivo:moca_subtracao)

# Exclui dados ausentes
cog <- cogn[complete.cases(cogn),]
#str(cog, list.len = ncol(cog))

# Renomeia a fluencia_f
cog <- dplyr::rename(cog, fluencia_f = q0023)

library(caret)
## Carregando pacotes exigidos: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
# NEAR ZERO VARIANCES
nzv <- nearZeroVar(cog, saveMetrics= TRUE)
nzv[nzv$nzv,][1:10,]
##          freqRatio percentUnique zeroVar  nzv
## palte2    29.33333      3.225806   FALSE TRUE
## paltea2   29.33333      3.225806   FALSE TRUE
## rtifesnr  45.00000      3.225806   FALSE TRUE
## rtisesnr  30.00000      2.150538   FALSE TRUE
## swmde4    45.50000      2.150538   FALSE TRUE
## swmpr      0.00000      1.075269    TRUE TRUE
## swmwe4    45.00000      3.225806   FALSE TRUE
## NA              NA            NA      NA   NA
## NA.1            NA            NA      NA   NA
## NA.2            NA            NA      NA   NA
# Filtra variáveis sem variância
nzv <- nearZeroVar(cog)
filteredDescr <- cog[, -nzv] #zeroVar
dim(filteredDescr)
## [1]  93 145
cogn <- filteredDescr
#colnames(cogn)

cogn$idade <- as.numeric(covid$idade_x)
cogn$escolaridade <- as.numeric(covid$escolaridade)

cogn$group <- as.factor(covid$grupo)
cogn$sex <- as.numeric(covid$sexo)
cogn$age_scaled <- (covid$idade_x - mean(covid$idade_x)) / sd(covid$idade_x)
cogn$educ_scaled <- (covid$escolaridade - mean(covid$escolaridade)) / sd(covid$escolaridade)
cogn$educ <- sqrt(covid$escolaridade)
###Cantab

#SWMBE
#OTSPSFC
#OTSMDLFC
#OTSMCC
#OTSMLC
#PALTEA
#PALFAMS
#PALMETS
#PRMPCI
#PRMPCD
#VRMIRTC
#VRMDRTC
#VRMFRDS
#RTISMDRT
#RTISMDMT
#RTIFMDRT
#RTIFMDMT

#MoCAtotal2
#pdcrs_relogio_total
#cube_total_error_score
#cubo_vertices
#cubo_erros_plano
#cube_vertices_errors
#cfq_11_total

#### Explorar, na estatística bayesiana, as famílias das métricas cognitivas:

#SWMBE (Poisson)*
#OTSPSFC (Poisson)
#OTSMDLFC (Negative binomial)
#OTSMCC (Linear)
#OTSMLC (Linear)
#PALTEA (Poisson)*
#PALFAMS (Poisson)
#PALMETS (Poisson)
#PRMPCI (Linear)
#PRMPCD (Linear)
#VRMIRTC (Linear)
#VRMDRTC (Linear)
#VRMFRDS (Linear)
#RTISMDRT (Linear)
#RTISMDMT (Linear)
#RTIFMDRT (Linear)
#RTIFMDMT (Linear)
#MoCAtotal2 (Linear)
#pdcrs_relogio_total (Quasi-Poisson)*
##  cube_total_error_score (Poisson)*
##  cubo_vertices (Quasi-Poisson)*
##  cubo_erros_plano (Poisson)*
##  cube_vertices_errors (Quasi-Poisson)*
# SWMBE
hist(covid$swmbe, breaks = 30)

library(flexplot)
## 
## Attaching package: 'flexplot'
## The following object is masked from 'package:ggplot2':
## 
##     flip_data
flexplot(swmbe ~ group + age_scaled + educ_scaled | sex, data = cogn)

flexplot(swmbe ~ age_scaled + sex | group, data = cogn, method = "lm", se=T)
## Warning: Using shapes for an ordinal variable is not advised

library(lmPerm)
library(multcomp)
## Carregando pacotes exigidos: mvtnorm
## Carregando pacotes exigidos: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## Carregando pacotes exigidos: TH.data
## Carregando pacotes exigidos: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## 
## Attaching package: 'multcomp'
## The following object is masked from 'package:flexplot':
## 
##     cholesterol
set.seed(123)
fit <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
           perm = "Exact")
## [1] "Settings:  unique SS : numeric variables centered"
anova(fit)
## Analysis of Variance Table
## 
## Response: swmbe
##             Df R Sum Sq R Mean Sq Iter Pr(Prob)    
## group        1    205.1    205.09 1711  0.05552 .  
## educ_scaled  1     15.3     15.35   51  0.90196    
## age_scaled   1   1399.2   1399.16 5000  < 2e-16 ***
## sex          1    180.9    180.86 1844  0.05152 .  
## Residuals   88   6761.5     76.84                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
fit1 <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
           perm = "prob")
## [1] "Settings:  unique SS : numeric variables centered"
anova(fit1)
## Analysis of Variance Table
## 
## Response: swmbe 
##             Df R Sum Sq R Mean Sq F value    Pr(>F)    
## group        1    205.1    205.09  2.6692    0.1059    
## educ_scaled  1     15.3     15.35  0.1998    0.6560    
## age_scaled   1   1399.2   1399.16 18.2098 4.972e-05 ***
## sex          1    180.9    180.86  2.3539    0.1286    
## Residuals   88   6761.5     76.84                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(123)
fit2 <- aovp(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
           perm = "prob")
## [1] "Settings:  unique SS : numeric variables centered"
anova(fit2)
## Analysis of Variance Table
## 
## Response: swmbe 
##                Df R Sum Sq R Mean Sq F value    Pr(>F)    
## group           1    361.4    361.40  4.8542   0.03022 *  
## educ_scaled     1      5.6      5.61  0.0753   0.78443    
## age_scaled      1   1606.3   1606.26 21.5745 1.199e-05 ***
## sex             1    229.5    229.54  3.0830   0.08263 .  
## age_scaled:sex  1    284.2    284.19  3.8170   0.05395 .  
## Residuals      87   6477.3     74.45                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(MASS)
library(performance)
## 
## Attaching package: 'performance'
## The following object is masked from 'package:flexplot':
## 
##     icc
fit3 <- glm(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn,
           family = "poisson")

summary(fit3)
## 
## Call:
## glm(formula = swmbe ~ group + educ_scaled + age_scaled + sex, 
##     family = "poisson", data = cogn)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.1443  -2.6082   0.0855   1.6077   4.6557  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.08112    0.10885  19.119  < 2e-16 ***
## groupCOVID   0.23283    0.06140   3.792 0.000149 ***
## educ_scaled -0.02464    0.02987  -0.825 0.409521    
## age_scaled   0.29459    0.02916  10.104  < 2e-16 ***
## sex          0.18934    0.06075   3.117 0.001828 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 785.32  on 92  degrees of freedom
## Residual deviance: 663.09  on 88  degrees of freedom
## AIC: 1025.9
## 
## Number of Fisher Scoring iterations: 5
# checking model assumptions
check_model(fit3)

fit4 <- glm(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
           family = "poisson")

summary(fit4)
## 
## Call:
## glm(formula = swmbe ~ group + educ_scaled + age_scaled + sex + 
##     age_scaled:sex, family = "poisson", data = cogn)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.8115  -2.2807   0.0121   1.7472   5.2029  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.80243    0.12709  14.182  < 2e-16 ***
## groupCOVID      0.32033    0.06396   5.008 5.50e-07 ***
## educ_scaled    -0.01034    0.03002  -0.344    0.731    
## age_scaled      0.85570    0.11528   7.423 1.15e-13 ***
## sex             0.30616    0.06747   4.538 5.69e-06 ***
## age_scaled:sex -0.32321    0.06443  -5.016 5.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 785.32  on 92  degrees of freedom
## Residual deviance: 638.00  on 87  degrees of freedom
## AIC: 1002.8
## 
## Number of Fisher Scoring iterations: 5
check_model(fit4)

anova(fit3, fit4)
## Analysis of Deviance Table
## 
## Model 1: swmbe ~ group + educ_scaled + age_scaled + sex
## Model 2: swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex
##   Resid. Df Resid. Dev Df Deviance
## 1        88     663.09            
## 2        87     638.00  1   25.096
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
fit5 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn)
summary(fit5)
## 
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex, data = cogn)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -3.98913 -1.26357  0.08126  1.25020  3.28165 
## 
## Count model coefficients (poisson with log link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.32565    0.10948  21.242  < 2e-16 ***
## groupCOVID   0.21487    0.06116   3.513 0.000443 ***
## educ_scaled -0.02513    0.03037  -0.828 0.407947    
## age_scaled   0.21644    0.02932   7.382 1.56e-13 ***
## sex          0.11736    0.06077   1.931 0.053441 .  
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.56146    1.29461  -0.434   0.6645  
## groupCOVID  -0.30131    0.73835  -0.408   0.6832  
## educ_scaled -0.04217    0.36408  -0.116   0.9078  
## age_scaled  -1.03395    0.50050  -2.066   0.0388 *
## sex         -1.07664    0.76895  -1.400   0.1615  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -423.5 on 10 Df
check_zeroinflation(fit5)
## # Check for zero-inflation
## 
##    Observed zeros: 10
##   Predicted zeros: 0
##             Ratio: 0.00
## Model is underfitting zeros (probable zero-inflation).
#check_model(fit5)

fit6 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn)
summary(fit6)
## 
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, 
##     data = cogn)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -3.81175 -1.26244 -0.01453  1.26324  3.81214 
## 
## Count model coefficients (poisson with log link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.08440    0.12792  16.294  < 2e-16 ***
## groupCOVID      0.28846    0.06378   4.523 6.10e-06 ***
## educ_scaled    -0.01323    0.03063  -0.432  0.66591    
## age_scaled      0.68856    0.11610   5.931 3.01e-09 ***
## sex             0.21875    0.06750   3.241  0.00119 ** 
## age_scaled:sex -0.27139    0.06471  -4.194 2.74e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -0.60319    1.35426  -0.445    0.656
## groupCOVID     -0.33434    0.80345  -0.416    0.677
## educ_scaled    -0.04465    0.36554  -0.122    0.903
## age_scaled     -1.20847    1.74927  -0.691    0.490
## sex            -1.02768    0.89133  -1.153    0.249
## age_scaled:sex  0.11165    1.06609   0.105    0.917
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -414.7 on 12 Df
fit7 <- zeroinfl(swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, data = cogn,
           dist = "negbin")
summary(fit7)
## 
## Call:
## zeroinfl(formula = swmbe ~ group + educ_scaled + age_scaled + sex + age_scaled:sex, 
##     data = cogn, dist = "negbin")
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -1.44018 -0.73588 -0.02194  0.64943  3.00350 
## 
## Count model coefficients (negbin with log link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.03913    0.30054   6.785 1.16e-11 ***
## groupCOVID      0.28384    0.16049   1.769   0.0770 .  
## educ_scaled    -0.02604    0.07864  -0.331   0.7406    
## age_scaled      0.76315    0.31498   2.423   0.0154 *  
## sex             0.23944    0.16119   1.485   0.1374    
## age_scaled:sex -0.30777    0.17530  -1.756   0.0791 .  
## Log(theta)      0.99207    0.21069   4.709 2.49e-06 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -0.73161    1.48660  -0.492    0.623
## groupCOVID     -0.31626    0.86601  -0.365    0.715
## educ_scaled    -0.06168    0.40632  -0.152    0.879
## age_scaled     -1.23180    1.85539  -0.664    0.507
## sex            -1.00722    0.95281  -1.057    0.290
## age_scaled:sex  0.12272    1.12050   0.110    0.913
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 2.6968 
## Number of iterations in BFGS optimization: 19 
## Log-likelihood:  -320 on 13 Df
# Compare models
compare_performance(fit5, fit6, fit7, rank = TRUE)
## # Comparison of Model Performance Indices
## 
## Name |    Model |     AIC |     BIC |    R2 | R2 (adj.) |  RMSE | Sigma | Score_log | Score_spherical | Performance-Score
## -------------------------------------------------------------------------------------------------------------------------
## fit7 | zeroinfl | 666.029 | 698.953 | 0.965 |     0.963 | 8.394 | 9.050 |    -3.441 |           0.069 |            85.51%
## fit6 | zeroinfl | 853.441 | 883.833 | 0.887 |     0.879 | 8.372 | 8.971 |    -4.459 |           0.067 |            32.24%
## fit5 | zeroinfl | 867.011 | 892.337 | 0.867 |     0.860 | 8.509 | 9.007 |    -4.554 |           0.067 |             7.21%
test_performance(fit5, fit6, fit7)
## Some of the nested models seem to be identical.
## Name |    Model |     BF
## ------------------------
## fit5 | zeroinfl |       
## fit6 | zeroinfl |  70.24
## fit7 | zeroinfl | > 1000
## Models were detected as nested and are compared in sequential order.
plot(compare_performance(fit5, fit6, fit7, rank = TRUE))
## Registered S3 methods overwritten by 'parameters':
##   method                           from      
##   as.double.parameters_kurtosis    datawizard
##   as.double.parameters_skewness    datawizard
##   as.double.parameters_smoothness  datawizard
##   as.numeric.parameters_kurtosis   datawizard
##   as.numeric.parameters_skewness   datawizard
##   as.numeric.parameters_smoothness datawizard
##   print.parameters_distribution    datawizard
##   print.parameters_kurtosis        datawizard
##   print.parameters_skewness        datawizard
##   summary.parameters_kurtosis      datawizard
##   summary.parameters_skewness      datawizard