library(readxl)
library(ggplot2)
library(CGPfunctions)
library(ggplot2)
library(plotly)
library(lmtest)
#Definir directorio
setwd("G:/TRABAJO/CONSULTORIAS/TRABAJOS VARIOS/JORGE CHAVARRIA")
data1 = read_excel("Data_Reg_Stent_Expansion.xlsx")
head(data1,5)
Descripcion variables * VAR_119: Relative Stent Expansion % * VAR_120: Absolute stent expansion * Post_Dilatation [0:N, 1:Y] * VAR_56: CONTRAST CALCIUM VOLUME * VAR_5: Type of Bicuspid AV [0: Raphe Non Calcified; 1:Raphe calcified] * VAR_82_GROUP: Raphe Calcified GROUP [0:N - Raphe Calcified;1:Y - Raphe Calcified] * VAR_CALC1: SV MEAN DIAMETER average/Annulus diameter average * VAR_CALC2: SV D min/Annulus diameter average * VAR_CALC3: SV D max/Annulus diameter average * VAR_62:VOLUME OF CALCIUM AT THE LEVEL OF THE RAPHE
#Relacion entre CONTRAST CALCIUM VOLUME vs Relative Stent Expansion %
g1=ggplot(data=data1,mapping=
aes(x=VAR_56,y=VAR_119))+geom_point()+theme_bw()+
geom_smooth()
ggplotly(g1)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Relacion entre Post_Dilatation vs Relative Stent Expansion %
boxplot(data1$VAR_119~data1$Post_Dilatation,
xlab = 'Relative Stent Expansion %',
ylab = 'Post_Dilatation',
col= 'azure')
#Relacion entre Type of Bicuspid AV vs Relative Stent Expansion %
boxplot(data1$VAR_119~data1$VAR_5,
xlab = 'Type of Bicuspid AV',
ylab = 'Relative Stent Expansion %',
col= 'bisque')
#Relacion entre Raphe Calcified GROUP vs Relative Stent Expansion %
boxplot(data1$VAR_119~data1$VAR_82_GROUP,
xlab = 'Raphe Calcified GROUP',
ylab = 'Relative Stent Expansion %',
col= 'ivory')
#Relacion entre SV MEAN DIAMETER average/Annulus diameter average/Annulus diameter average vs Relative Stent Expansion %
g2=ggplot(data=data1,mapping=
aes(x=VAR_CALC1,y=VAR_119))+geom_point()+theme_bw()+
geom_smooth()
ggplotly(g2)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Relacion entre SV D min/Annulus diameter average vs Relative Stent Expansion %
g3=ggplot(data=data1,mapping=
aes(x=VAR_CALC2,y=VAR_119))+geom_point()+theme_bw()+
geom_smooth()
ggplotly(g3)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Relacion entre SV D max/Annulus diameter average vs Relative Stent Expansion %
g4=ggplot(data=data1,mapping=
aes(x=VAR_CALC3,y=VAR_119))+geom_point()+theme_bw()+
geom_smooth()
ggplotly(g4)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Función para agregar coeficientes de correlación
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
Cor <- abs(cor(x, y)) # Elimina la función abs si lo prefieres
txt <- paste0(prefix, format(c(Cor, 0.123456789), digits = digits)[1])
if(missing(cex.cor)) {
cex.cor <- 0.4 / strwidth(txt)
}
text(0.5, 0.5, txt,
cex = 1 + cex.cor * Cor) # Escala el texto al nivel de correlación
}
#MATRIZ DE CORRELACION.
pairs(~ VAR_119 + VAR_120 + Post_Dilatation + VAR_56 +
VAR_5 + VAR_82_GROUP + VAR_CALC1 + VAR_CALC2 +
VAR_CALC3 + VAR_62,
upper.panel = panel.cor,
lower.panel = panel.smooth,
data = data1)
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
Warning in par(usr) : argument 1 does not name a graphical parameter
#Iteracion Inicial
mod1=lm(VAR_119 ~ Post_Dilatation + VAR_56 + VAR_5 +VAR_82_GROUP +
VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62,
data = data1)
summary(mod1)
Call:
lm(formula = VAR_119 ~ Post_Dilatation + VAR_56 + VAR_5 + VAR_82_GROUP +
VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62, data = data1)
Residuals:
Min 1Q Median 3Q Max
-8.4386 -1.5012 0.2836 1.8424 5.4278
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 86.5068344 3.8378950 22.540 < 2e-16 ***
Post_Dilatation 2.9556786 0.7376701 4.007 0.000126 ***
VAR_56 0.0003394 0.0005693 0.596 0.552470
VAR_5 -0.1006347 0.6735126 -0.149 0.881555
VAR_82_GROUP -0.3960602 0.7482925 -0.529 0.597896
VAR_CALC1 4.2960017 5.8714740 0.732 0.466248
VAR_CALC2 0.6558248 3.9060393 0.168 0.867034
VAR_CALC3 -1.7584037 4.2352667 -0.415 0.678986
VAR_62 -0.0023952 0.0016867 -1.420 0.159014
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.845 on 91 degrees of freedom
Multiple R-squared: 0.1672, Adjusted R-squared: 0.09401
F-statistic: 2.284 on 8 and 91 DF, p-value: 0.02821
Conclusion 1: Realizar una regresion directamente no arroja buenos resultados, se procede a estandarizar.
library(dplyr)
set.seed(1)
data2 <- data1 %>% mutate_at(c('Post_Dilatation','VAR_56','VAR_5',
'VAR_82_GROUP','VAR_CALC1',
'VAR_CALC2','VAR_CALC3','VAR_62'), ~(scale(.) %>% as.vector))
head(data2)
#Iteracion data estandarizada
mod2=lm(VAR_119 ~ Post_Dilatation + VAR_56 + VAR_5 +VAR_82_GROUP +
VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62,
data = data2)
summary(mod2)
Call:
lm(formula = VAR_119 ~ Post_Dilatation + VAR_56 + VAR_5 + VAR_82_GROUP +
VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62, data = data2)
Residuals:
Min 1Q Median 3Q Max
-8.4386 -1.5012 0.2836 1.8424 5.4278
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 90.68210 0.28452 318.722 < 2e-16 ***
Post_Dilatation 1.23055 0.30712 4.007 0.000126 ***
VAR_56 0.21586 0.36202 0.596 0.552470
VAR_5 -0.04933 0.33016 -0.149 0.881555
VAR_82_GROUP -0.17672 0.33389 -0.529 0.597896
VAR_CALC1 0.46341 0.63336 0.732 0.466248
VAR_CALC2 0.07470 0.44492 0.168 0.867034
VAR_CALC3 -0.21882 0.52705 -0.415 0.678986
VAR_62 -0.49909 0.35147 -1.420 0.159014
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.845 on 91 degrees of freedom
Multiple R-squared: 0.1672, Adjusted R-squared: 0.09401
F-statistic: 2.284 on 8 and 91 DF, p-value: 0.02821
Continuamos sin resultados satisfactorios, pasamos a modelar VAR_120.
#TRrasnformacion sobre VAR_120
mod3=lm(log(VAR_120) ~ Post_Dilatation + VAR_56 + VAR_5 + VAR_82_GROUP +
VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62,
data = data1)
summary(mod3)
Call:
lm(formula = log(VAR_120) ~ Post_Dilatation + VAR_56 + VAR_5 +
VAR_82_GROUP + VAR_CALC1 + VAR_CALC2 + VAR_CALC3 + VAR_62,
data = data1)
Residuals:
Min 1Q Median 3Q Max
-0.29818 -0.12138 0.01193 0.11499 0.41310
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.630e+00 2.111e-01 26.666 <2e-16 ***
Post_Dilatation 6.602e-03 4.058e-02 0.163 0.8711
VAR_56 7.283e-05 3.132e-05 2.326 0.0223 *
VAR_5 1.053e-02 3.705e-02 0.284 0.7768
VAR_82_GROUP -1.086e-02 4.117e-02 -0.264 0.7925
VAR_CALC1 2.404e-01 3.230e-01 0.744 0.4586
VAR_CALC2 -5.043e-01 2.149e-01 -2.347 0.0211 *
VAR_CALC3 6.027e-02 2.330e-01 0.259 0.7965
VAR_62 4.639e-05 9.279e-05 0.500 0.6183
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1565 on 91 degrees of freedom
Multiple R-squared: 0.1996, Adjusted R-squared: 0.1293
F-statistic: 2.837 on 8 and 91 DF, p-value: 0.007378
Se obtienen variables significativas, procedemos a optimizar la seleccion de variables.
library(MASS)
# Stepwise para mejorar la seleccion de variables
step.model <- stepAIC(mod3, direction = "both",
trace = FALSE)
summary(step.model)
Call:
lm(formula = log(VAR_120) ~ VAR_56 + VAR_CALC1 + VAR_CALC2, data = data1)
Residuals:
Min 1Q Median 3Q Max
-0.29437 -0.12264 0.01924 0.12164 0.42597
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.642e+00 1.992e-01 28.326 < 2e-16 ***
VAR_56 8.138e-05 2.503e-05 3.251 0.00159 **
VAR_CALC1 3.201e-01 2.124e-01 1.507 0.13500
VAR_CALC2 -5.226e-01 2.034e-01 -2.570 0.01172 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1528 on 96 degrees of freedom
Multiple R-squared: 0.1955, Adjusted R-squared: 0.1704
F-statistic: 7.776 on 3 and 96 DF, p-value: 0.0001058
Se obtiene un modelo significativo aunque su capacidad de prediccion es baja
#Se toman resultados del step y se fuerza la inclusion de Post_Dilatation
mod4=lm(log(VAR_120) ~ Post_Dilatation + VAR_56 + VAR_CALC1 + VAR_CALC2, data = data1)
summary(mod4)
Call:
lm(formula = log(VAR_120) ~ Post_Dilatation + VAR_56 + VAR_CALC1 +
VAR_CALC2, data = data1)
Residuals:
Min 1Q Median 3Q Max
-0.29260 -0.12207 0.02141 0.12285 0.42830
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.640e+00 2.004e-01 28.146 < 2e-16 ***
Post_Dilatation 9.992e-03 3.710e-02 0.269 0.78824
VAR_56 8.137e-05 2.516e-05 3.235 0.00167 **
VAR_CALC1 3.223e-01 2.136e-01 1.509 0.13452
VAR_CALC2 -5.248e-01 2.046e-01 -2.566 0.01186 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1535 on 95 degrees of freedom
Multiple R-squared: 0.1961, Adjusted R-squared: 0.1623
F-statistic: 5.794 on 4 and 95 DF, p-value: 0.0003239
Se continua con el step model logrado
par(mfrow=c(2,2))
plot(step.model)
shapiro.test(step.model$residuals)
Shapiro-Wilk normality test
data: step.model$residuals
W = 0.97178, p-value = 0.03029
#Cumple con la normalidad
bptest(step.model)
studentized Breusch-Pagan test
data: step.model
BP = 3.3635, df = 3, p-value = 0.3389
#NO Cumple con heterocedasticidad
dwtest( step.model,
alternative = "two.sided",
data = data1)
Durbin-Watson test
data: step.model
DW = 1.9405, p-value = 0.7571
alternative hypothesis: true autocorrelation is not 0
#Cumple con indepencia en los residuos
CONCLUSION FINAL: Logramos obtener un modelo significativo de baja capacidad predictiva.