All files used here are availible in a public repository licensed under MIT Licences and accessible by the following url:
https://github.com/crepeia/MBRP
#Setting Directory
setwd("~/MBRP_R")
#Importing SPSS file .sav
base.dat <- read.spss("Base.sav", to.data.frame = T, use.missings = T)
#Recode Missing Data 888
for (i in c(1:350)) {
base.dat[,c(i)]<-sub("888", "NA", base.dat[,c(i)], ignore.case = FALSE, perl = FALSE, fixed = F, useBytes = FALSE)
}
#Recode Missing Data 777
for (i in c(1:350)) {
base.dat[,c(i)]<-sub("777", "NA", base.dat[,c(i)], ignore.case = FALSE, perl = FALSE, fixed = F, useBytes = FALSE)
}
#Creating a subset for analysis without cases excluded in our baseline
MBRP <- base.dat[grep("CORRI", base.dat$ETAPA), ]
MBRP_baseline <- MBRP[ ,c(11,12,19,24,252,265,295,296,299,300)]
#Dataframe
MBRP_base <- MBRP_baseline[,c(5:10)] #only psychometric variables
MBRP_base2 <- MBRP_baseline[,c(5:10,2,1,3,4)] # all variables
#As dataframe
MBRP_base<-as.data.frame(MBRP_base)
MBRP_base2<-as.data.frame(MBRP_base2)
#As factor
MBRP_base2[,c(8)]<-as.factor(MBRP_base2[,c(8)])
MBRP_base2[,c(9)]<-as.factor(MBRP_base2[,c(9)])
MBRP_base2[,c(10)]<-as.factor(MBRP_base2[,c(10)])
#As numeric
for (i in c(1:6)) {
MBRP_base[,c(i)]<-as.numeric(MBRP_base[,c(i)])
}
for (i in c(1:6)) {
MBRP_base2[,c(i)]<-as.numeric(MBRP_base2[,c(i)])
}
#Cleaning missing data for regression analysis
MBRP_regression <- MBRP_base[,c(1:6)]
MBRP_regression <- na.omit(MBRP_regression)
MBRP_regression2 <- MBRP_base2[,c(1:10)]
MBRP_regression2 <- na.omit(MBRP_regression2)
MBRP_regression3<-MBRP_regression2[,1:6]
#Status Social Economic - Variables
##Gender
describe(MBRP_regression2$X.1Gênero)
## MBRP_regression2$X.1Gênero
## n missing unique
## 90 0 2
##
## Feminino (71, 79%), Masculino (19, 21%)
summary(MBRP_regression2$X.1Gênero)
## Feminino Masculino
## 71 19
##Educational Status
describe(MBRP_regression2$X.7.1RECODEEscolaridade)
## MBRP_regression2$X.7.1RECODEEscolaridade
## n missing unique
## 90 0 5
##
## 0 ANOS DE ESTUDO ATÉ 4 ANOS (19, 21%)
## 5 ANOS DE ESTUDO ATÉ 8 ANOS (13, 14%)
## 9 ANOS DE ESTUDO ATÉ 11 ANOS (39, 43%)
## NA (1, 1%)
## SUPERIOR INCOMPLETO OU COMPLETO (18, 20%)
summary(MBRP_regression2$X.7.1RECODEEscolaridade)
## 0 ANOS DE ESTUDO ATÉ 4 ANOS 5 ANOS DE ESTUDO ATÉ 8 ANOS
## 19 13
## 9 ANOS DE ESTUDO ATÉ 11 ANOS NA
## 39 1
## SUPERIOR INCOMPLETO OU COMPLETO
## 18
##Meditation
describe(MBRP_regression2$X.11.1Vocêpraticameditação)
## MBRP_regression2$X.11.1Vocêpraticameditação
## n missing unique
## 90 0 3
##
## NA (1, 1%), Não (85, 94%), Sim (4, 4%)
summary(MBRP_regression2$X.11.1Vocêpraticameditação)
## NA Não Sim
## 1 85 4
#Age
MBRP_regression2$X.2Idade<-as.numeric(MBRP_regression2$X.2Idade)
summary(MBRP_regression2$X.2Idade)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 42.00 49.50 48.73 56.75 71.00
sd(MBRP_regression2$X.2Idade)
## [1] 11.37887
#FFMQ
summary(MBRP_regression3$FFMQTOTAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 90.0 113.2 122.0 121.5 129.0 151.0
sd(MBRP_regression3$FFMQTOTAL)
## [1] 13.37048
#FARGESTRON
summary(MBRP_regression3$FAGERTRONTOTAL)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.000 7.000 6.367 8.000 10.000
sd(MBRP_regression3$FAGERTRONTOTAL)
## [1] 2.200357
#HAD Ansiedade
summary(MBRP_regression3$HADansiedade)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 9.00 11.50 11.44 14.75 20.00
sd(MBRP_regression3$HADansiedade)
## [1] 3.871855
#HAD Depressão
summary(MBRP_regression3$HADdepressao)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 5.000 9.000 8.589 11.000 21.000
sd(MBRP_regression3$HADdepressao)
## [1] 4.191737
#Afeto Positivo
summary(MBRP_regression3$AfetoPositivo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.00 19.00 25.50 25.28 30.75 41.00
sd(MBRP_regression3$AfetoPositivo)
## [1] 6.92987
#Afeto Negativo
summary(MBRP_regression3$AfetoNegativo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 16.00 20.00 20.86 25.00 33.00
sd(MBRP_regression3$AfetoNegativo)
## [1] 6.366275
#Define FFMQ as reference variable
x<-MBRP_regression3[1]
print(corr.test(x,MBRP_regression3[2:5]), short=F)
## Call:corr.test(x = x, y = MBRP_regression3[2:5])
## Correlation matrix
## FAGERTRONTOTAL HADansiedade HADdepressao AfetoPositivo
## FFMQTOTAL -0.31 -0.33 -0.46 0.49
## Sample Size
## [1] 90
## Probability values adjusted for multiple tests.
## FAGERTRONTOTAL HADansiedade HADdepressao AfetoPositivo
## FFMQTOTAL 0 0 0 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## lower r upper p
## FFMQT-FAGER -0.48 -0.31 -0.11 0
## FFMQT-HADns -0.50 -0.33 -0.13 0
## FFMQT-HADdp -0.61 -0.46 -0.28 0
## FFMQT-AftPs 0.32 0.49 0.64 0
#Correlation Matrix
print(plot(MBRP_regression3, gap=0))
## NULL
print(corr.test(MBRP_regression3), short=F)
## Call:corr.test(x = MBRP_regression3)
## Correlation matrix
## FFMQTOTAL FAGERTRONTOTAL HADansiedade HADdepressao
## FFMQTOTAL 1.00 -0.31 -0.33 -0.46
## FAGERTRONTOTAL -0.31 1.00 0.34 0.19
## HADansiedade -0.33 0.34 1.00 0.62
## HADdepressao -0.46 0.19 0.62 1.00
## AfetoPositivo 0.49 -0.06 -0.28 -0.63
## AfetoNegativo -0.37 0.19 0.59 0.54
## AfetoPositivo AfetoNegativo
## FFMQTOTAL 0.49 -0.37
## FAGERTRONTOTAL -0.06 0.19
## HADansiedade -0.28 0.59
## HADdepressao -0.63 0.54
## AfetoPositivo 1.00 -0.27
## AfetoNegativo -0.27 1.00
## Sample Size
## [1] 90
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## FFMQTOTAL FAGERTRONTOTAL HADansiedade HADdepressao
## FFMQTOTAL 0 0.02 0.01 0.00
## FAGERTRONTOTAL 0 0.00 0.01 0.22
## HADansiedade 0 0.00 0.00 0.00
## HADdepressao 0 0.08 0.00 0.00
## AfetoPositivo 0 0.58 0.01 0.00
## AfetoNegativo 0 0.07 0.00 0.00
## AfetoPositivo AfetoNegativo
## FFMQTOTAL 0.00 0.00
## FAGERTRONTOTAL 0.58 0.22
## HADansiedade 0.04 0.00
## HADdepressao 0.00 0.00
## AfetoPositivo 0.00 0.04
## AfetoNegativo 0.01 0.00
##
## To see confidence intervals of the correlations, print with the short=FALSE option
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## lower r upper p
## FFMQT-FAGER -0.48 -0.31 -0.11 0.00
## FFMQT-HADns -0.50 -0.33 -0.13 0.00
## FFMQT-HADdp -0.61 -0.46 -0.28 0.00
## FFMQT-AftPs 0.32 0.49 0.64 0.00
## FFMQT-AftNg -0.54 -0.37 -0.18 0.00
## FAGER-HADns 0.15 0.34 0.51 0.00
## FAGER-HADdp -0.02 0.19 0.38 0.08
## FAGER-AftPs -0.26 -0.06 0.15 0.58
## FAGER-AftNg -0.02 0.19 0.38 0.07
## HADns-HADdp 0.48 0.62 0.74 0.00
## HADns-AftPs -0.46 -0.28 -0.08 0.01
## HADns-AftNg 0.44 0.59 0.71 0.00
## HADdp-AftPs -0.74 -0.63 -0.49 0.00
## HADdp-AftNg 0.38 0.54 0.67 0.00
## AftPs-AftNg -0.45 -0.27 -0.07 0.01
#Fit Model
fit1 <- lm(FFMQTOTAL ~ FAGERTRONTOTAL + HADansiedade + HADdepressao + AfetoNegativo + AfetoPositivo, data=MBRP_regression3)
#Model Summary
summary(fit1) # show results
##
## Call:
## lm(formula = FFMQTOTAL ~ FAGERTRONTOTAL + HADansiedade + HADdepressao +
## AfetoNegativo + AfetoPositivo, data = MBRP_regression3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.3185 -7.5528 -0.0555 8.6064 20.1614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.3348 8.4400 14.495 < 2e-16 ***
## FAGERTRONTOTAL -1.4854 0.5629 -2.639 0.00992 **
## HADansiedade 0.1495 0.4372 0.342 0.73322
## HADdepressao -0.3488 0.4594 -0.759 0.44977
## AfetoNegativo -0.3968 0.2350 -1.688 0.09503 .
## AfetoPositivo 0.7193 0.2197 3.274 0.00154 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.96 on 84 degrees of freedom
## Multiple R-squared: 0.3661, Adjusted R-squared: 0.3284
## F-statistic: 9.703 on 5 and 84 DF, p-value: 2.434e-07
#Anova
anova(fit1) # anova table
## Analysis of Variance Table
##
## Response: FFMQTOTAL
## Df Sum Sq Mean Sq F value Pr(>F)
## FAGERTRONTOTAL 1 1505.8 1505.76 12.5411 0.0006524 ***
## HADansiedade 1 871.9 871.90 7.2618 0.0085053 **
## HADdepressao 1 1871.6 1871.65 15.5885 0.0001630 ***
## AfetoNegativo 1 288.3 288.34 2.4015 0.1249792
## AfetoPositivo 1 1287.3 1287.33 10.7219 0.0015389 **
## Residuals 84 10085.5 120.07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Standartized Coeficients
fit.beta1 <- lm.beta(fit1)
#Results
fit.beta1
## FAGERTRONTOTAL HADansiedade HADdepressao AfetoNegativo AfetoPositivo
## -0.24445255 0.04329522 -0.10935928 -0.18892576 0.37281267
confint(fit1, 'FAGERTRONTOTAL', level=0.95)
## 2.5 % 97.5 %
## FAGERTRONTOTAL -2.604887 -0.3659464
confint(fit1, 'HADansiedade', level=0.95)
## 2.5 % 97.5 %
## HADansiedade -0.7198781 1.018896
confint(fit1, 'HADdepressao', level=0.95)
## 2.5 % 97.5 %
## HADdepressao -1.262353 0.5647015
confint(fit1, 'AfetoPositivo', level=0.95)
## 2.5 % 97.5 %
## AfetoPositivo 0.2824601 1.156148
confint(fit1, 'AfetoNegativo', level=0.95)
## 2.5 % 97.5 %
## AfetoNegativo -0.8640988 0.07053355
confint(fit1, '(Intercept)', level=0.95)
## 2.5 % 97.5 %
## (Intercept) 105.551 139.1187
#Fit Model
fit2 <- lm(FFMQTOTAL ~ FAGERTRONTOTAL + HADansiedade + HADdepressao, data=MBRP_regression3)
#Model Summary
summary(fit2) # show results
##
## Call:
## lm(formula = FFMQTOTAL ~ FAGERTRONTOTAL + HADansiedade + HADdepressao,
## data = MBRP_regression3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.5865 -7.2839 0.1823 8.4676 29.2764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 141.4974 4.5904 30.825 < 2e-16 ***
## FAGERTRONTOTAL -1.4356 0.5981 -2.401 0.01853 *
## HADansiedade 0.1029 0.4274 0.241 0.81027
## HADdepressao -1.4012 0.3772 -3.715 0.00036 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.64 on 86 degrees of freedom
## Multiple R-squared: 0.2671, Adjusted R-squared: 0.2415
## F-statistic: 10.45 on 3 and 86 DF, p-value: 6.263e-06
#Anova
anova(fit2) # anova table
## Analysis of Variance Table
##
## Response: FFMQTOTAL
## Df Sum Sq Mean Sq F value Pr(>F)
## FAGERTRONTOTAL 1 1505.8 1505.8 11.1048 0.0012710 **
## HADansiedade 1 871.9 871.9 6.4301 0.0130276 *
## HADdepressao 1 1871.6 1871.7 13.8032 0.0003602 ***
## Residuals 86 11661.2 135.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Standartized Coeficients
fit.beta2 <- lm.beta(fit2)
#Results
fit.beta2
## FAGERTRONTOTAL HADansiedade HADdepressao
## -0.23625971 0.02980471 -0.43929970
confint(fit2, 'FAGERTRONTOTAL', level=0.95)
## 2.5 % 97.5 %
## FAGERTRONTOTAL -2.624524 -0.2467415
confint(fit2, 'HADansiedade', level=0.95)
## 2.5 % 97.5 %
## HADansiedade -0.7467041 0.9525502
confint(fit2, 'HADdepressao', level=0.95)
## 2.5 % 97.5 %
## HADdepressao -2.151011 -0.6514769
confint(fit2, '(Intercept)', level=0.95)
## 2.5 % 97.5 %
## (Intercept) 132.3721 150.6228
#Model with all variables p<0.10
fit <- lm(FFMQTOTAL ~ FAGERTRONTOTAL + AfetoPositivo + AfetoNegativo , data=MBRP_regression3)
fit.beta <- lm.beta(fit)
confint(fit, 'FAGERTRONTOTAL', level=0.95)
## 2.5 % 97.5 %
## FAGERTRONTOTAL -2.536956 -0.4167065
confint(fit, 'AfetoNegativo', level=0.95)
## 2.5 % 97.5 %
## AfetoNegativo -0.8186923 -0.05906136
confint(fit, 'AfetoPositivo', level=0.95)
## 2.5 % 97.5 %
## AfetoPositivo 0.4752086 1.16142
confint(fit, '(Intercept)', level=0.95)
## 2.5 % 97.5 %
## (Intercept) 105.007 133.7337
#Model Summary
summary(fit, corr=T) # show results
##
## Call:
## lm(formula = FFMQTOTAL ~ FAGERTRONTOTAL + AfetoPositivo + AfetoNegativo,
## data = MBRP_regression3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.1230 -7.5704 0.6185 9.1805 19.9789
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 119.3703 7.2252 16.521 < 2e-16 ***
## FAGERTRONTOTAL -1.4768 0.5333 -2.769 0.00688 **
## AfetoPositivo 0.8183 0.1726 4.741 8.35e-06 ***
## AfetoNegativo -0.4389 0.1911 -2.297 0.02404 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.87 on 86 degrees of freedom
## Multiple R-squared: 0.3618, Adjusted R-squared: 0.3395
## F-statistic: 16.25 on 3 and 86 DF, p-value: 1.884e-08
##
## Correlation of Coefficients:
## (Intercept) FAGERTRONTOTAL AfetoPositivo
## FAGERTRONTOTAL -0.37
## AfetoPositivo -0.75 0.01
## AfetoNegativo -0.63 -0.18 0.26
fit.beta
## FAGERTRONTOTAL AfetoPositivo AfetoNegativo
## -0.2430397 0.4241293 -0.2089687
#Anova
anova(fit) # anova table
## Analysis of Variance Table
##
## Response: FFMQTOTAL
## Df Sum Sq Mean Sq F value Pr(>F)
## FAGERTRONTOTAL 1 1505.8 1505.8 12.7521 0.0005851 ***
## AfetoPositivo 1 3626.9 3626.9 30.7161 3.215e-07 ***
## AfetoNegativo 1 623.0 623.0 5.2765 0.0240448 *
## Residuals 86 10154.8 118.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# diagnostic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
print(plot(fit))
## NULL
# Calculate Relative Importance for Each Predictor R^2 contribution averaged over orderings among regressors
calc.relimp(fit,type=c("lmg"),
rela=TRUE)
## Response variable: FFMQTOTAL
## Total response variance: 178.7697
## Analysis based on 90 observations
##
## 3 Regressors:
## FAGERTRONTOTAL AfetoPositivo AfetoNegativo
## Proportion of variance explained by model: 36.18%
## Metrics are normalized to sum to 100% (rela=TRUE).
##
## Relative importance metrics:
##
## lmg
## FAGERTRONTOTAL 0.2024682
## AfetoPositivo 0.5616697
## AfetoNegativo 0.2358621
##
## Average coefficients for different model sizes:
##
## 1X 2Xs 3Xs
## FAGERTRONTOTAL -1.8693432 -1.5976500 -1.4768311
## AfetoPositivo 0.9542459 0.8723013 0.8183143
## AfetoNegativo -0.7761034 -0.6063582 -0.4388768
# Assessing Outliers
outlierTest(fit) # Bonferonni p-value for most extreme obs
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 37 -2.429909 0.017207 NA
qqPlot(fit, main="QQ Plot") #qq plot for studentized resid
leveragePlots(fit) # leverage plots
# Global test of model assumptions
gvmodel <- gvlma(fit)
summary(gvmodel)
##
## Call:
## lm(formula = FFMQTOTAL ~ FAGERTRONTOTAL + AfetoPositivo + AfetoNegativo,
## data = MBRP_regression3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.1230 -7.5704 0.6185 9.1805 19.9789
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 119.3703 7.2252 16.521 < 2e-16 ***
## FAGERTRONTOTAL -1.4768 0.5333 -2.769 0.00688 **
## AfetoPositivo 0.8183 0.1726 4.741 8.35e-06 ***
## AfetoNegativo -0.4389 0.1911 -2.297 0.02404 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.87 on 86 degrees of freedom
## Multiple R-squared: 0.3618, Adjusted R-squared: 0.3395
## F-statistic: 16.25 on 3 and 86 DF, p-value: 1.884e-08
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit)
##
## Value p-value Decision
## Global Stat 2.28334 0.6838 Assumptions acceptable.
## Skewness 0.49209 0.4830 Assumptions acceptable.
## Kurtosis 1.75077 0.1858 Assumptions acceptable.
## Link Function 0.01030 0.9192 Assumptions acceptable.
## Heteroscedasticity 0.03018 0.8621 Assumptions acceptable.
# identify D values > 4/(n-k-1)
cutoff <- 4/((nrow(MBRP_regression3)-length(fit$coefficients)-2))
plot(fit, which=4, cook.levels=cutoff)
# Influence Plot
influencePlot(fit, id.method="identify", main="Influence Plot", sub="Circle size is proportial to Cook's Distance" )
# Evaluate Collinearity
vif(fit) # variance inflation factors
## FAGERTRONTOTAL AfetoPositivo AfetoNegativo
## 1.037801 1.078251 1.115142
sqrt(vif(fit)) > 2 # problem?
## FAGERTRONTOTAL AfetoPositivo AfetoNegativo
## FALSE FALSE FALSE
# Test for Autocorrelated Errors
durbinWatsonTest(fit)
## lag Autocorrelation D-W Statistic p-value
## 1 0.0296727 1.929834 0.77
## Alternative hypothesis: rho != 0
# Evaluate Nonlinearity
# component + residual plot
crPlots(fit)
# Ceres plots
ceresPlots(fit)
# Evaluate homoscedasticity
# non-constant error variance test
ncvTest(fit)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.008645663 Df = 1 p = 0.9259178
# plot studentized residuals vs. fitted values
spreadLevelPlot(fit)
##
## Suggested power transformation: -0.1744018
## Normality of Residuals
# qq plot for studentized resid
qqPlot(fit, main="QQ Plot")
# distribution of studentized residuals
sresid <- studres(fit)
hist(sresid, freq=FALSE, main="Distribution of Studentized Residuals")
xfit<-seq(min(sresid),max(sresid),length=40)
yfit<-dnorm(xfit)
lines(xfit, yfit)