library(readxl)
library(ggplot2)
library(CGPfunctions)
library(plotly)
library(lmtest)
#Definir directorio
setwd("G:/TRABAJO/CONSULTORIAS/TRABAJOS VARIOS/JORGE CHAVARRIA/analisis3")
data1 = read_excel("Bicuspid3.xlsx")
head(data1,5)
ANALISIS ELLIPTICITY
#Relacion entre Maximum Sinus Diameter Indexed vs Ellipticity
g1=ggplot(data=data1,mapping=
aes(x=MaxSinAnnDcalc,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g1 + labs(title = "Ellipticity vs Maximum Sinus Diameter Indexed",
x = "Maximum Sinus Diameter Indexed",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
correlation_test1 <- cor.test(data1$MaxSinAnnDcalc, data1$Ellipticity)
print(correlation_test1)
Pearson's product-moment correlation
data: data1$MaxSinAnnDcalc and data1$Ellipticity
t = -0.037089, df = 97, p-value = 0.9705
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.2010282 0.1937901
sample estimates:
cor
-0.003765816
Podemos observar que la correlacion es nula entre estas variables
#Relacion entre Minimun Sinus Diameter Indexed vs Ellipticity
g2=ggplot(data=data1,mapping=
aes(x=MinSinusAnnDcalc,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g2 + labs(title = "Ellipticity vs Minimun Sinus Diameter Indexed",
x = "Minimun Sinus Diameter Indexed",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
correlation_test2 <- cor.test(data1$MinSinusAnnDcalc, data1$Ellipticity)
print(correlation_test2)
Pearson's product-moment correlation
data: data1$MinSinusAnnDcalc and data1$Ellipticity
t = 0.54578, df = 97, p-value = 0.5865
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1436501 0.2500118
sample estimates:
cor
0.05533081
Tampoco existe correlacion entre estas variables
#Relacion entre Postdilatation vs Ellipticity
boxplot(data1$Ellipticity~data1$Postdilation,
xlab = 'Postdilation',
ylab = 'Ellipticity',
title=('Postdilatation vs Ellipticity'),
col= 'bisque')
t_test1 <- t.test(data1$Ellipticity ~ as.factor(data1$Postdilation))
print(t_test1)
Welch Two Sample t-test
data: data1$Ellipticity by as.factor(data1$Postdilation)
t = 0.95701, df = 28.291, p-value = 0.3467
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
-0.01228230 0.03384074
sample estimates:
mean in group 0 mean in group 1
1.097143 1.086364
No se evidencia una diferencia significativa en la Ellipticity en funcion de la Postdilatation
ggplot(data = data1) + geom_density(aes(x=Ellipticity,fill=factor(Postdilation)),
bins=10, position = "identity",alpha = 0.5)
Warning: Ignoring unknown parameters: bins
#Relacion entre Predilatation vs Ellipticity
boxplot(data1$Ellipticity~data1$Predilatation,
xlab = 'Predilatation',
ylab = 'Ellipticity',
title=('Predilatation vs Ellipticity'),
col= 'bisque')
t_test2 <- t.test(data1$Ellipticity ~ as.factor(data1$Predilatation))
print(t_test2)
Welch Two Sample t-test
data: data1$Ellipticity by as.factor(data1$Predilatation)
t = -0.28948, df = 56.74, p-value = 0.7733
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
-0.02078043 0.01553156
sample estimates:
mean in group 0 mean in group 1
1.093846 1.096471
No existe una diferencia significativa de Ellipticity en función de la Predilatation
ggplot(data = data1) + geom_density(aes(x=Ellipticity,fill=factor(Predilatation)),
bins=10, position = "identity",alpha = 0.5)
Warning: Ignoring unknown parameters: bins
#Relacion entre Raphe Calcification vs Ellipticity
boxplot(data1$Ellipticity~data1$Raphaecalcification,
xlab = 'Raphae Calcification',
ylab = 'Ellipticity',
title=('Raphae Calcification vs Ellipticity'),
col= 'ivory')
ggplot(data = data1) + geom_density(aes(x=Ellipticity,fill=factor(Raphaecalcification)),
bins=10, position = "identity",alpha = 0.5)
Warning: Ignoring unknown parameters: bins
t_test3 <- t.test(data1$Ellipticity ~ as.factor(data1$Raphaecalcification))
print(t_test3)
Welch Two Sample t-test
data: data1$Ellipticity by as.factor(data1$Raphaecalcification)
t = -2.3326, df = 89.761, p-value = 0.02191
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
-0.033216625 -0.002659574
sample estimates:
mean in group 0 mean in group 1
1.083514 1.101452
Existe diferencia significativa de Ellipticity en función del Raphaecalcification
#Relacion entre AVAi vs Ellipticity
g4=ggplot(data=data1,mapping=
aes(x=AVAi,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g4 + labs(title = "Ellipticity vs AVAi",
x = "AVAi",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 24 rows containing non-finite values (stat_smooth).
Warning: Removed 24 rows containing missing values (geom_point).
correlation_test3 <- cor.test(data1$AVAi, data1$Ellipticity)
print(correlation_test3)
Pearson's product-moment correlation
data: data1$AVAi and data1$Ellipticity
t = -1.0448, df = 73, p-value = 0.2996
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3390060 0.1085693
sample estimates:
cor
-0.1213836
No existe correlacion significativa
#Relacion entre Mean Gradient (mmHg) vs Ellipticity
g5=ggplot(data=data1,mapping=
aes(x=MeanGradientmmHg,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g5 + labs(title = "Ellipticity vs Mean Gradient (mmHg)",
x = "Mean Gradient (mmHg)",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
correlation_test4 <- cor.test(data1$MeanGradientmmHg, data1$Ellipticity)
print(correlation_test4)
Pearson's product-moment correlation
data: data1$MeanGradientmmHg and data1$Ellipticity
t = 1.8065, df = 97, p-value = 0.07394
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.01762871 0.36482930
sample estimates:
cor
0.180411
la correlacion entre MeanGradientmmHg y RelativeStentExpansion es positiva y significativa
#Relacion entre ICD4mm vs Ellipticity
g6=ggplot(data=data1,mapping=
aes(x=ICD4mm,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g6 + labs(title = "Ellipticity vs Intercomisural Diameter at 4 mm Indexed",
x = "Intercomisural Diameter at 4 mm Indexed",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
correlation_test5 <- cor.test(data1$ICD4mm, data1$Ellipticity)
print(correlation_test5)
Pearson's product-moment correlation
data: data1$ICD4mm and data1$Ellipticity
t = 0.10506, df = 97, p-value = 0.9165
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1871391 0.2076414
sample estimates:
cor
0.01066681
No existe correlacion significativa entre ICD4mm y Ellipticity
#Relacion entre CCV vs Ellipticity
g7=ggplot(data=data1,mapping=
aes(x=CCV,y=Ellipticity,))+geom_point()+theme_bw()+
geom_smooth(method = "lm")
g7 + labs(title = "Ellipticity vs Calcium contrast volume",
x = "Calcium contrast volume",
y= "Ellipticity")
`geom_smooth()` using formula 'y ~ x'
correlation_test6 <- cor.test(data1$CCV, data1$Ellipticity)
print(correlation_test6)
Pearson's product-moment correlation
data: data1$CCV and data1$Ellipticity
t = 1.0569, df = 97, p-value = 0.2932
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.09266871 0.29783359
sample estimates:
cor
0.106695
La correlacion entre CCV y Ellipticity no es significativa.
#Relacion entre CCV vs Ellipticitycalc
boxplot(data1$CCV~data1$Ellipticitycalc,
xlab = 'Ellipticity',
ylab = 'Calcium contrast volume',
title=('Ellipticitycalc vs Calcium contrast volume'),
col= 'ivory')
REGRESION LINEAL EN FUNCION DE Ellipticity
colnames(data1)
[1] "StudyIDglobal" "RelativeStentExpansion" "RSEcalc" "Ellipticity"
[5] "Ellipticitycalc" "MaxSinAnnDcalc" "MinSinusAnnDcalc" "Predilatation"
[9] "Postdilation" "CCV" "Raphaecalcification" "AVAi"
[13] "MeanGradientmmHg" "ICD4mm" "ADDiameter" "ICD4mm_calc"
[17] "SVDmax" "SVDmin"
#Regresion inicial
mod1=lm(Ellipticity ~ Predilatation + Postdilation +
CCV + Raphaecalcification +
ICD4mm + SVDmax
,
data = data1)
summary(mod1)
Call:
lm(formula = Ellipticity ~ Predilatation + Postdilation + CCV +
Raphaecalcification + ICD4mm + SVDmax, data = data1)
Residuals:
Min 1Q Median 3Q Max
-0.07665 -0.02952 -0.00092 0.02322 0.11700
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.119e+00 4.281e-02 26.136 <2e-16 ***
Predilatation 7.993e-04 8.773e-03 0.091 0.9276
Postdilation -9.231e-03 9.642e-03 -0.957 0.3409
CCV 6.266e-06 7.988e-06 0.784 0.4349
Raphaecalcification 1.648e-02 8.671e-03 1.901 0.0605 .
ICD4mm 1.915e-03 2.116e-03 0.905 0.3678
SVDmax -2.460e-03 1.293e-03 -1.902 0.0602 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0393 on 92 degrees of freedom
Multiple R-squared: 0.09752, Adjusted R-squared: 0.03866
F-statistic: 1.657 on 6 and 92 DF, p-value: 0.1406
# Obtener el resumen del modelo
summary_mod1 <- summary(mod1)
# Extraer los coeficientes
coefficients <- coef(summary_mod1)[, 1]
# Extraer los intervalos de confianza
conf_intervals <- confint(mod1)
# Extraer los p-values
p_values <- summary_mod1$coefficients[, 4]
# Crear el data frame consolidado
consolidado <- data.frame(Coefficient = coefficients,
Lower_CI = conf_intervals[, 1],
Upper_CI = conf_intervals[, 2],
p_value = p_values)
# Mostrar el data frame consolidado
print(consolidado)
NA
# Aplicar el procedimiento stepwise
mod1_1 <- step(mod1)
Start: AIC=-634.08
Ellipticity ~ Predilatation + Postdilation + CCV + Raphaecalcification +
ICD4mm + SVDmax
Df Sum of Sq RSS AIC
- Predilatation 1 0.0000128 0.14213 -636.07
- CCV 1 0.0009503 0.14306 -635.42
- ICD4mm 1 0.0012651 0.14338 -635.20
- Postdilation 1 0.0014159 0.14353 -635.10
<none> 0.14211 -634.08
- Raphaecalcification 1 0.0055811 0.14769 -632.27
- SVDmax 1 0.0055905 0.14770 -632.26
Step: AIC=-636.07
Ellipticity ~ Postdilation + CCV + Raphaecalcification + ICD4mm +
SVDmax
Df Sum of Sq RSS AIC
- CCV 1 0.0010521 0.14318 -637.34
- ICD4mm 1 0.0012618 0.14339 -637.20
- Postdilation 1 0.0014094 0.14353 -637.09
<none> 0.14213 -636.07
- SVDmax 1 0.0055784 0.14770 -634.26
- Raphaecalcification 1 0.0056006 0.14773 -634.24
Step: AIC=-637.34
Ellipticity ~ Postdilation + Raphaecalcification + ICD4mm + SVDmax
Df Sum of Sq RSS AIC
- Postdilation 1 0.0012544 0.14443 -638.48
- ICD4mm 1 0.0023359 0.14551 -637.74
<none> 0.14318 -637.34
- SVDmax 1 0.0050845 0.14826 -635.89
- Raphaecalcification 1 0.0080923 0.15127 -633.90
Step: AIC=-638.48
Ellipticity ~ Raphaecalcification + ICD4mm + SVDmax
Df Sum of Sq RSS AIC
- ICD4mm 1 0.0029232 0.14736 -638.49
<none> 0.14443 -638.48
- SVDmax 1 0.0055768 0.15001 -636.73
- Raphaecalcification 1 0.0083392 0.15277 -634.92
Step: AIC=-638.49
Ellipticity ~ Raphaecalcification + SVDmax
Df Sum of Sq RSS AIC
- SVDmax 1 0.0026574 0.15001 -638.72
<none> 0.14736 -638.49
- Raphaecalcification 1 0.0082956 0.15565 -635.07
Step: AIC=-638.72
Ellipticity ~ Raphaecalcification
Df Sum of Sq RSS AIC
<none> 0.15001 -638.72
- Raphaecalcification 1 0.0074561 0.15747 -635.92
summary(mod1_1)
Call:
lm(formula = Ellipticity ~ Raphaecalcification, data = data1)
Residuals:
Min 1Q Median 3Q Max
-0.071452 -0.032483 -0.001452 0.027517 0.108548
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.083514 0.006465 167.594 <2e-16 ***
Raphaecalcification 0.017938 0.008170 2.196 0.0305 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.03933 on 97 degrees of freedom
Multiple R-squared: 0.04735, Adjusted R-squared: 0.03753
F-statistic: 4.821 on 1 and 97 DF, p-value: 0.0305
REGRESION LOGISTICA EN FUNCION DE Ellipticitycalc(1;0)
#Modelo binario
mod2 <- glm(Ellipticitycalc ~ Predilatation + Postdilation +
CCV + Raphaecalcification +
ICD4mm + SVDmax
,
data = data1, family = "binomial")
summary(mod2)
Call:
glm(formula = Ellipticitycalc ~ Predilatation + Postdilation +
CCV + Raphaecalcification + ICD4mm + SVDmax, family = "binomial",
data = data1)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.5372 -1.0507 -0.6165 1.0619 1.8547
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.2145393 2.3226103 0.092 0.92640
Predilatation -0.2414236 0.4799513 -0.503 0.61495
Postdilation -0.3921355 0.5311670 -0.738 0.46036
CCV 0.0003218 0.0004496 0.716 0.47407
Raphaecalcification 0.5690322 0.4702630 1.210 0.22627
ICD4mm 0.2493679 0.1170813 2.130 0.03318 *
SVDmax -0.2091710 0.0764831 -2.735 0.00624 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 136.75 on 98 degrees of freedom
Residual deviance: 124.68 on 92 degrees of freedom
AIC: 138.68
Number of Fisher Scoring iterations: 4
# Aplicar el procedimiento stepwise
mod2_1 <- step(mod2)
Start: AIC=138.68
Ellipticitycalc ~ Predilatation + Postdilation + CCV + Raphaecalcification +
ICD4mm + SVDmax
Df Deviance AIC
- Predilatation 1 124.94 136.94
- CCV 1 125.20 137.20
- Postdilation 1 125.23 137.24
- Raphaecalcification 1 126.17 138.17
<none> 124.68 138.68
- ICD4mm 1 129.45 141.45
- SVDmax 1 133.22 145.22
Step: AIC=136.94
Ellipticitycalc ~ Postdilation + CCV + Raphaecalcification +
ICD4mm + SVDmax
Df Deviance AIC
- CCV 1 125.32 135.32
- Postdilation 1 125.53 135.53
- Raphaecalcification 1 126.39 136.39
<none> 124.94 136.94
- ICD4mm 1 129.68 139.68
- SVDmax 1 133.62 143.62
Step: AIC=135.32
Ellipticitycalc ~ Postdilation + Raphaecalcification + ICD4mm +
SVDmax
Df Deviance AIC
- Postdilation 1 125.84 133.84
<none> 125.32 135.32
- Raphaecalcification 1 127.53 135.53
- ICD4mm 1 131.43 139.43
- SVDmax 1 133.66 141.66
Step: AIC=133.84
Ellipticitycalc ~ Raphaecalcification + ICD4mm + SVDmax
Df Deviance AIC
<none> 125.84 133.84
- Raphaecalcification 1 128.14 134.14
- ICD4mm 1 132.64 138.64
- SVDmax 1 134.62 140.62
summary(mod2_1)
Call:
glm(formula = Ellipticitycalc ~ Raphaecalcification + ICD4mm +
SVDmax, family = "binomial", data = data1)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6529 -1.0435 -0.6456 1.0645 1.7298
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.64810 2.02707 -0.320 0.74918
Raphaecalcification 0.66918 0.44553 1.502 0.13310
ICD4mm 0.28054 0.11161 2.514 0.01195 *
SVDmax -0.20666 0.07406 -2.790 0.00527 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 136.75 on 98 degrees of freedom
Residual deviance: 125.84 on 95 degrees of freedom
AIC: 133.84
Number of Fisher Scoring iterations: 4
#Modelo binario Ellipticity (1;0) final
mod2_final <- glm(formula = Ellipticitycalc ~ Raphaecalcification + ICD4mm +
SVDmax, family = "binomial", data = data1)
# Obtener el resumen del modelo
summary_mod2 <- summary(mod2_final)
summary_mod2
Call:
glm(formula = Ellipticitycalc ~ Raphaecalcification + ICD4mm +
SVDmax, family = "binomial", data = data1)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6529 -1.0435 -0.6456 1.0645 1.7298
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.64810 2.02707 -0.320 0.74918
Raphaecalcification 0.66918 0.44553 1.502 0.13310
ICD4mm 0.28054 0.11161 2.514 0.01195 *
SVDmax -0.20666 0.07406 -2.790 0.00527 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 136.75 on 98 degrees of freedom
Residual deviance: 125.84 on 95 degrees of freedom
AIC: 133.84
Number of Fisher Scoring iterations: 4
Calculos de Odd Ratio
# Extraer los odds ratios
odds_ratios <- exp(summary_mod2$coefficients[, 1])
# Extraer los intervalos de confianza
conf_intervals <- exp(confint(mod2_final))
Waiting for profiling to be done...
# Extraer los p-values
p_values <- summary_mod2$coefficients[, 4]
# Crear el data frame consolidado
consolidado <- data.frame(Odds_Ratio = odds_ratios,
Lower_CI = conf_intervals[, 1],
Upper_CI = conf_intervals[, 2],
p_value = p_values)
# Mostrar el data frame consolidado
print(consolidado)
NA
correlation_test6 <- cor.test(data1$ICD4mm_calc, data1$MinSinusAnnDcalc )
print(correlation_test6)
Pearson's product-moment correlation
data: data1$ICD4mm_calc and data1$MinSinusAnnDcalc
t = 2.5412, df = 97, p-value = 0.01263
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.0551484 0.4262298
sample estimates:
cor
0.2498402
correlation_test7 <- cor.test(data1$ICD4mm, data1$SVDmin)
print(correlation_test7)
Pearson's product-moment correlation
data: data1$ICD4mm and data1$SVDmin
t = 6.6989, df = 97, p-value = 1.378e-09
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.4105785 0.6838884
sample estimates:
cor
0.5624056
correlation_test8 <- cor.test(data1$ICD4mm, data1$Ellipticity)
print(correlation_test8)
Pearson's product-moment correlation
data: data1$ICD4mm and data1$Ellipticity
t = 0.10506, df = 97, p-value = 0.9165
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1871391 0.2076414
sample estimates:
cor
0.01066681
correlation_test9 <- cor.test(data1$SVDmin, data1$Ellipticity)
print(correlation_test9)
Pearson's product-moment correlation
data: data1$SVDmin and data1$Ellipticity
t = -1.0149, df = 97, p-value = 0.3127
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.29396809 0.09686709
sample estimates:
cor
-0.1025049
En el análisis realizado, se están evaluando dos situaciones diferentes:
Correlación de Pearson: En este caso, se calculó el coeficiente de correlación de Pearson entre las variables “ICD4mm” y “Ellipticity”. El coeficiente de correlación obtenido fue de 0.012, lo cual indica una correlación extremadamente débil entre ambas variables. Además, el valor p asociado a la prueba de hipótesis es de 0.9052, lo cual indica que no hay evidencia suficiente para rechazar la hipótesis nula de que la correlación sea igual a cero. El intervalo de confianza del 95% para la correlación está entre -0.1838663 y 0.2069580.
Regresión lineal: En este caso, se ajustó un modelo de regresión lineal utilizando la variable “Ellipticitycalc” como variable de respuesta y otras variables independientes. En el modelo, la variable “ICD4mm_calc” resultó significativa (p-value = 0.00157), lo cual indica que tiene un efecto estadísticamente significativo en la variable de respuesta. Sin embargo, es importante tener en cuenta que la significancia en un modelo de regresión no implica necesariamente una fuerte correlación entre las variables.
La diferencia entre los resultados puede deberse a que el coeficiente de correlación de Pearson evalúa la relación lineal entre dos variables de forma independiente de otras variables, mientras que la regresión lineal considera el efecto conjunto de todas las variables independientes en la variable de respuesta. En otras palabras, la significancia de “ICD4mm_calc” en el modelo de regresión puede deberse a su relación con las otras variables incluidas en el modelo, y no necesariamente a su correlación directa con “Ellipticitycalc”.
Es importante destacar que el análisis de los resultados debe considerar el contexto y los objetivos del estudio, así como tener en cuenta otras consideraciones estadísticas y de interpretación.