library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library("ggplot2")
library(psych)
## 
## Adjuntando el paquete: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(tableone)
library(table1)
## 
## Adjuntando el paquete: 'table1'
## 
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(dplyr)
options(scipen = 999, digits = 3, encoding = 'UTF-8')
library(descr)
library(expss)
## Cargando paquete requerido: maditr
## 
## Use magrittr pipe '%>%' to chain several operations:
##              mtcars %>%
##                  let(mpg_hp = mpg/hp) %>%
##                  take(mean(mpg_hp), by = am)
##         
## 
## 
## Adjuntando el paquete: 'maditr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following object is masked from 'package:readr':
## 
##     cols
## 
## 
## Adjuntando el paquete: 'expss'
## 
## The following objects are masked from 'package:stringr':
## 
##     fixed, regex
## 
## The following objects are masked from 'package:dplyr':
## 
##     compute, contains, na_if, recode, vars, where
## 
## The following objects are masked from 'package:purrr':
## 
##     keep, modify, modify_if, when
## 
## The following objects are masked from 'package:tidyr':
## 
##     contains, nest
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars
library(DescTools)
## 
## Adjuntando el paquete: 'DescTools'
## 
## The following object is masked from 'package:maditr':
## 
##     %like%
## 
## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(summarytools)
## 
## Adjuntando el paquete: 'summarytools'
## 
## The following objects are masked from 'package:descr':
## 
##     descr, freq
## 
## The following objects are masked from 'package:table1':
## 
##     label, label<-
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(data.table)
## 
## Adjuntando el paquete: 'data.table'
## 
## The following object is masked from 'package:DescTools':
## 
##     %like%
## 
## The following objects are masked from 'package:expss':
## 
##     copy, like
## 
## The following objects are masked from 'package:maditr':
## 
##     copy, dcast, let, melt
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(epiR)
## Cargando paquete requerido: survival
## Package epiR 2.0.77 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library(survival)
library(survminer)
## Cargando paquete requerido: ggpubr
## 
## Adjuntando el paquete: 'ggpubr'
## 
## The following object is masked from 'package:expss':
## 
##     compare_means
## 
## 
## Adjuntando el paquete: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(pwr)
library(ggpubr)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## 
## The following objects are masked from 'package:data.table':
## 
##     yearmon, yearqtr
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library("openxlsx")
## Warning: package 'openxlsx' was built under R version 4.4.3
library("car")
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## 
## The following object is masked from 'package:DescTools':
## 
##     Recode
## 
## The following object is masked from 'package:expss':
## 
##     recode
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library("lmtest")
library("MASS")
## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
dm <- read_excel("C:/Users/Administrador/Downloads/dm.xlsx", sheet = 1)
#transformar en factor Sexo, bajaeduc, imc_cat
dm1=dm
dm <- dm %>% mutate(across(c(imc_cat,sexo,bajaeduc), as.factor))
levels(dm$imc_cat)
## [1] "1" "2" "3"
#Utilizar un modelo de regresión lineal simple para analizar la relación entre la glucemia y el índice de masa corporal (medido en forma continua). 
modelo1 <- lm(gluc~imc, data = dm)
confint(modelo1)
##              2.5 % 97.5 %
## (Intercept) 77.612 89.424
## imc          0.496  0.903
tab_model(modelo1)
  gluc
Predictors Estimates CI p
(Intercept) 83.52 77.61 – 89.42 <0.001
imc 0.70 0.50 – 0.90 <0.001
Observations 247
R2 / R2 adjusted 0.157 / 0.154
summary(modelo1)
## 
## Call:
## lm(formula = gluc ~ imc, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.501  -6.351   0.302   6.100  27.002 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   83.518      2.998   27.85 < 0.0000000000000002 ***
## imc            0.699      0.103    6.76       0.000000000099 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.91 on 245 degrees of freedom
## Multiple R-squared:  0.157,  Adjusted R-squared:  0.154 
## F-statistic: 45.7 on 1 and 245 DF,  p-value: 0.0000000000995
#ecuación Glucemia = 83.52 + 0.70x IMC
#Construir un modelo de regresión lineal múltiple para evaluar la relación entre glucemia (gluc) e índice de masa corporal (imc) ajustando por las variables sexo y edad. 
dm <- dm %>% mutate(sexo1 = factor(sexo, levels = c("0", "1")))

modelo2 <-lm(gluc ~ imc+edad+sexo1, data = dm) 
confint(modelo2)
##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009
tab_model(modelo2)
  gluc
Predictors Estimates CI p
(Intercept) 79.93 72.24 – 87.62 <0.001
imc 0.60 0.40 – 0.80 <0.001
edad 0.15 0.05 – 0.24 0.004
sexo1 [1] -3.22 -5.43 – -1.01 0.004
Observations 247
R2 / R2 adjusted 0.220 / 0.211
summary(modelo2)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438
#¿Hay un efecto confundidor de la variable “Bajo nivel educativo” (bajaeduc) 
#sobre la asociación entre glucemia e índice de masa corporal? Justificar la 
#respuesta.
modelo3 <-lm(gluc ~ imc + bajaeduc, data = dm)
confint(modelo3)
##              2.5 % 97.5 %
## (Intercept) 77.609 89.414
## imc          0.468  0.883
## bajaeduc1   -0.965  3.595
tab_model(modelo3)
  gluc
Predictors Estimates CI p
(Intercept) 83.51 77.61 – 89.41 <0.001
imc 0.68 0.47 – 0.88 <0.001
bajaeduc [1] 1.32 -0.96 – 3.59 0.257
Observations 247
R2 / R2 adjusted 0.162 / 0.155
summary(modelo3)
## 
## Call:
## lm(formula = gluc ~ imc + bajaeduc, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.783  -6.053   0.298   6.185  26.523 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   83.512      2.997   27.87 < 0.0000000000000002 ***
## imc            0.676      0.105    6.41        0.00000000076 ***
## bajaeduc1      1.315      1.157    1.14                 0.26    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.91 on 244 degrees of freedom
## Multiple R-squared:  0.162,  Adjusted R-squared:  0.155 
## F-statistic: 23.5 on 2 and 244 DF,  p-value: 0.000000000453
#baja educ no es confundidor
#Existe la hipótesis de que la asociación entre índice de masa corporal (imc) e 
#glucemia podría ser distinta según la edad de las personas. Crear una variable 
#dicotómica para edad con punto de corte mayor a 50 años. Testear la hipótesis 
#utilizando esta nueva variable. Concluir al respecto.  
#MODIF DE EFECTO
dm <- dm %>% mutate(edad_cat=as.factor(case_when(edad>50~"viejo",edad <=50~"joven")))
summary(dm$edad_cat)
## joven viejo 
##    87   160
modelo4 <-lm(gluc ~ imc+ edad_cat + imc:edad_cat, data = dm)
confint(modelo4)
##                    2.5 % 97.5 %
## (Intercept)       70.574 91.329
## imc                0.338  1.079
## edad_catviejo     -7.073 18.095
## imc:edad_catviejo -0.510  0.375
tab_model(modelo4)
  gluc
Predictors Estimates CI p
(Intercept) 80.95 70.57 – 91.33 <0.001
imc 0.71 0.34 – 1.08 <0.001
edad cat [viejo] 5.51 -7.07 – 18.10 0.389
imc × edad cat [viejo] -0.07 -0.51 – 0.37 0.764
Observations 247
R2 / R2 adjusted 0.189 / 0.179
summary(modelo4)
## 
## Call:
## lm(formula = gluc ~ imc + edad_cat + imc:edad_cat, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.209  -5.839   0.459   6.127  26.096 
## 
## Coefficients:
##                   Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        80.9512     5.2685   15.37 < 0.0000000000000002 ***
## imc                 0.7086     0.1881    3.77              0.00021 ***
## edad_catviejo       5.5112     6.3885    0.86              0.38917    
## imc:edad_catviejo  -0.0674     0.2244   -0.30              0.76415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.78 on 243 degrees of freedom
## Multiple R-squared:  0.189,  Adjusted R-squared:  0.179 
## F-statistic: 18.9 on 3 and 243 DF,  p-value: 0.0000000000477
#correr un modelo para mayores de 50 y otro para menores de 50

basemay50 <- subset(dm, edad_cat == "viejo")
base_men50<-subset(dm,edad_cat=="joven")
modelo6viejo<- lm(gluc ~ imc, data = basemay50)
confint(modelo6viejo)
##              2.5 % 97.5 %
## (Intercept) 79.305 93.620
## imc          0.399  0.884
tab_model(modelo6viejo)
  gluc
Predictors Estimates CI p
(Intercept) 86.46 79.31 – 93.62 <0.001
imc 0.64 0.40 – 0.88 <0.001
Observations 160
R2 / R2 adjusted 0.147 / 0.142
summary(modelo6viejo)
## 
## Call:
## lm(formula = gluc ~ imc, data = basemay50)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.416  -5.962   0.046   6.527  26.096 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   86.462      3.624   23.86 < 0.0000000000000002 ***
## imc            0.641      0.123    5.22           0.00000056 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.81 on 158 degrees of freedom
## Multiple R-squared:  0.147,  Adjusted R-squared:  0.142 
## F-statistic: 27.2 on 1 and 158 DF,  p-value: 0.000000557
modelo6joven<- lm(gluc ~ imc, data = base_men50)
confint(modelo6joven)
##              2.5 % 97.5 %
## (Intercept) 70.533  91.37
## imc          0.337   1.08
tab_model(modelo6joven)
  gluc
Predictors Estimates CI p
(Intercept) 80.95 70.53 – 91.37 <0.001
imc 0.71 0.34 – 1.08 <0.001
Observations 87
R2 / R2 adjusted 0.144 / 0.134
summary(modelo6joven)
## 
## Call:
## lm(formula = gluc ~ imc, data = base_men50)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28.21  -5.36   1.58   6.06  22.33 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   80.951      5.240   15.45 < 0.0000000000000002 ***
## imc            0.709      0.187    3.79              0.00028 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.73 on 85 degrees of freedom
## Multiple R-squared:  0.144,  Adjusted R-squared:  0.134 
## F-statistic: 14.4 on 1 and 85 DF,  p-value: 0.000282
modelo5 <- lm(gluc ~ imc+edad+sexo1+ imc: edad_cat, data = dm)
confint(modelo5)
##                     2.5 % 97.5 %
## (Intercept)       70.3915 91.669
## imc                0.3674  0.807
## edad              -0.0433  0.294
## sexo11            -5.4203 -0.980
## imc:edad_catviejo -0.1176  0.159
tab_model(modelo5)
  gluc
Predictors Estimates CI p
(Intercept) 81.03 70.39 – 91.67 <0.001
imc 0.59 0.37 – 0.81 <0.001
edad 0.13 -0.04 – 0.29 0.144
sexo1 [1] -3.20 -5.42 – -0.98 0.005
imc × edad catviejo 0.02 -0.12 – 0.16 0.769
Observations 247
R2 / R2 adjusted 0.221 / 0.208
summary(modelo5)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1 + imc:edad_cat, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.963  -5.771   0.993   6.205  26.049 
## 
## Coefficients:
##                   Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        81.0304     5.4009   15.00 < 0.0000000000000002 ***
## imc                 0.5871     0.1115    5.26           0.00000031 ***
## edad                0.1254     0.0857    1.46               0.1444    
## sexo11             -3.2002     1.1271   -2.84               0.0049 ** 
## imc:edad_catviejo   0.0207     0.0702    0.29               0.7685    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.63 on 242 degrees of freedom
## Multiple R-squared:  0.221,  Adjusted R-squared:  0.208 
## F-statistic: 17.1 on 4 and 242 DF,  p-value: 0.00000000000221
#modelo que incluye imc, edad y sexo en función de edad
modelo6<- lm(gluc ~ imc+edad+sexo1, data = dm)
confint(modelo6)
##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009
tab_model(modelo6)
  gluc
Predictors Estimates CI p
(Intercept) 79.93 72.24 – 87.62 <0.001
imc 0.60 0.40 – 0.80 <0.001
edad 0.15 0.05 – 0.24 0.004
sexo1 [1] -3.22 -5.43 – -1.01 0.004
Observations 247
R2 / R2 adjusted 0.220 / 0.211
summary(modelo6)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438
#evaluación de linealidad
#Generación de predichos

predichos <- fitted.values(modelo6)

#Generación de residuos crudos y estandarizados

res <- residuals(modelo6)
#generación de residuos estandarizados:

standres <-rstandard(modelo6)

#gráfico para evaluar linealidad

plot(y=modelo6$residuals,x=modelo6$fitted.values)
abline(h=0)

#gráfico avplots de residuos de variable x en relación a variable y
avPlots(modelo6)

#evaluación de normalidad
densityPlot(modelo6$residuals)

hist(modelo6$residuals)

boxplot(modelo6$residuals)

qqPlot(standres) #se utilizan residuos estandarizados

## [1] 109  19
describe(modelo6$residuals)
##    vars   n mean   sd median trimmed  mad   min  max range  skew kurtosis   se
## X1    1 247    0 8.56   0.89    0.19 8.85 -27.1 26.3  53.5 -0.22     0.18 0.54
shapiro.test(modelo6$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  modelo6$residuals
## W = 1, p-value = 0.2
#test de Breusch Pagan de homocedasticidad
bptest(modelo6)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo6
## BP = 1, df = 3, p-value = 0.7
#Independencia: Dubin Watson

dwtest(modelo6)
## 
##  Durbin-Watson test
## 
## data:  modelo6
## DW = 2, p-value = 0.6
## alternative hypothesis: true autocorrelation is greater than 0
#multicolinealidad
vif(modelo6) 
##   imc  edad sexo1 
##  1.05  1.06  1.05
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Adjuntando el paquete: 'GGally'
## The following object is masked from 'package:emmeans':
## 
##     pigs
vars <- dm[, c("gluc", "sexo", "edad", "imc")]
vars$sexo <- as.factor(vars$sexo)
ggpairs(vars, title = "Matriz de correlaciones y dispersión")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#outliers
standres <-rstandard(modelo6) 
predichos <- modelo6$fitted.values 
plot(y=standres, x=predichos) 
abline(h=c(-3, 3))

which(abs(standres) > 3)
##  19 109 223 
##  19 109 223
standres[standres >3]
##  223 
## 3.08
standres[standres < -3]
##    19   109 
## -3.17 -3.17
#valores influyentes
leverage <- as.data.frame(hatvalues(modelo6)) 
cooksd <- cooks.distance(modelo6) 
cooksd[cooksd>(4/247)]#n es el tamaño muestral
##     14     19     46     64    109    129    183    196    202    215    223 
## 0.0206 0.0736 0.0185 0.0326 0.0309 0.0235 0.0190 0.0426 0.0169 0.0190 0.0335 
##    245 
## 0.0210
#Leverage
leverage <- hatvalues(modelo6)

#Distancia de Cook
cooksd <- cooks.distance(modelo6)

#Umbral típico para Cook's Distance
umbral_cook <- 4 / length(cooksd)

#Indices de observaciones influyentes
influyentes <- which(cooksd > umbral_cook)

#Crear tabla resumen
tabla_influyentes <- data.frame(
  fila = influyentes,
  leverage = leverage[influyentes],
  distancia_cook = cooksd[influyentes]
)

#Mostrar tabla
print(tabla_influyentes)
##     fila leverage distancia_cook
## 14    14   0.0279         0.0206
## 19    19   0.0285         0.0736
## 46    46   0.0142         0.0185
## 64    64   0.0267         0.0326
## 109  109   0.0122         0.0309
## 129  129   0.0192         0.0235
## 183  183   0.0157         0.0190
## 196  196   0.0256         0.0426
## 202  202   0.0186         0.0169
## 215  215   0.0199         0.0190
## 223  223   0.0139         0.0335
## 245  245   0.0197         0.0210
# Calcular leverage (valores de hat)
leverage <- hatvalues(modelo6)

# Calcular distancia de Cook
cooks <- cooks.distance(modelo6)

#Identificar outliers (residuos estandarizados > 3 o < -3)
outlier_indices <- which(abs(standres) > 3)

#Crear un data frame resumen con info relevante de los outliers
outliers_tabla <- data.frame(
  fila = outlier_indices,
  valor_real = dm[outlier_indices, modelo6$call$formula[[2]]],  # Variable dependiente
  predicho = predichos[outlier_indices],
  residuo_estandarizado = standres[outlier_indices],
  leverage = leverage[outlier_indices],
  distancia_cook = cooks[outlier_indices]
)

#Ver tabla
print(outliers_tabla)
##     fila gluc predicho residuo_estandarizado leverage distancia_cook
## 19    19   78      105                 -3.17   0.0285         0.0736
## 109  109   74      101                 -3.17   0.0122         0.0309
## 223  223  135      109                  3.08   0.0139         0.0335
#Se corre el modelo sin la observación 19 y sin la observación 196
SIN_19 <- dm [-19, ]

confint(modelo6)
##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009
tab_model(modelo6)
  gluc
Predictors Estimates CI p
(Intercept) 79.93 72.24 – 87.62 <0.001
imc 0.60 0.40 – 0.80 <0.001
edad 0.15 0.05 – 0.24 0.004
sexo1 [1] -3.22 -5.43 – -1.01 0.004
Observations 247
R2 / R2 adjusted 0.220 / 0.211
summary(modelo6)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438
modelo9<- lm(gluc ~ imc+edad+sexo1, data = SIN_19)
confint(modelo9)
##              2.5 % 97.5 %
## (Intercept) 71.202 86.357
## imc          0.396  0.792
## edad         0.071  0.267
## sexo11      -5.098 -0.743
tab_model(modelo9)
  gluc
Predictors Estimates CI p
(Intercept) 78.78 71.20 – 86.36 <0.001
imc 0.59 0.40 – 0.79 <0.001
edad 0.17 0.07 – 0.27 0.001
sexo1 [1] -2.92 -5.10 – -0.74 0.009
Observations 246
R2 / R2 adjusted 0.231 / 0.221
summary(modelo9)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = SIN_19)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.122  -5.740   0.782   6.112  26.466 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  78.7795     3.8470   20.48 < 0.0000000000000002 ***
## imc           0.5942     0.1005    5.91          0.000000011 ***
## edad          0.1690     0.0498    3.40               0.0008 ***
## sexo11       -2.9202     1.1055   -2.64               0.0088 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.45 on 242 degrees of freedom
## Multiple R-squared:  0.231,  Adjusted R-squared:  0.221 
## F-statistic: 24.2 on 3 and 242 DF,  p-value: 0.0000000000000964
SIN_196 <- dm [-196, ]
modelo10<- lm(gluc ~ imc+edad+sexo1, data = SIN_196)
confint(modelo10)
##               2.5 % 97.5 %
## (Intercept) 72.5552 87.764
## imc          0.3730  0.774
## edad         0.0605  0.257
## sexo11      -5.5906 -1.209
tab_model(modelo10)
  gluc
Predictors Estimates CI p
(Intercept) 80.16 72.56 – 87.76 <0.001
imc 0.57 0.37 – 0.77 <0.001
edad 0.16 0.06 – 0.26 0.002
sexo1 [1] -3.40 -5.59 – -1.21 0.002
Observations 246
R2 / R2 adjusted 0.226 / 0.216
summary(modelo10)
## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = SIN_196)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.198  -5.766   0.952   5.985  26.357 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  80.1597     3.8605   20.76 < 0.0000000000000002 ***
## imc           0.5734     0.1018    5.63          0.000000049 ***
## edad          0.1588     0.0499    3.18               0.0016 ** 
## sexo11       -3.3998     1.1122   -3.06               0.0025 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.51 on 242 degrees of freedom
## Multiple R-squared:  0.226,  Adjusted R-squared:  0.216 
## F-statistic: 23.5 on 3 and 242 DF,  p-value: 0.000000000000218
#Crear tablas comparativas de los coeficientes con y sin las observaciones 19 y 196
# Extraer coeficientes
coef_ori <- coef(modelo6)
coef_sin <- coef(modelo9)

# Crear tabla comparativa
comparacion <- data.frame(
  Coeficiente = names(coef_ori),
  Original = coef_ori,
  Sin_19 = coef_sin,
  Cambio_pct = 100 * (coef_sin - coef_ori) / coef_ori
)

# Mostrar tabla
print(comparacion)
##             Coeficiente Original Sin_19 Cambio_pct
## (Intercept) (Intercept)   79.933 78.780     -1.443
## imc                 imc    0.600  0.594     -0.962
## edad               edad    0.146  0.169     15.842
## sexo11           sexo11   -3.221 -2.920     -9.335
# Extraer coeficientes
coef_ori1 <- coef(modelo6)
coef_sin1 <- coef(modelo10)

# Crear tabla comparativa
comparacion1 <- data.frame(
  Coeficiente = names(coef_ori),
  Original = coef_ori1,
  Sin_196 = coef_sin1,
  Cambio_pct = 100 * (coef_sin1 - coef_ori1) / coef_ori1
)

# Mostrar tabla
print(comparacion1)
##             Coeficiente Original Sin_196 Cambio_pct
## (Intercept) (Intercept)   79.933  80.160      0.283
## imc                 imc    0.600   0.573     -4.430
## edad               edad    0.146   0.159      8.825
## sexo11           sexo11   -3.221  -3.400      5.556