Ejercicio2

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)
library("ggplot2")
library(psych)

## 
## Adjuntando el paquete: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(tableone)
library(table1)

## 
## Adjuntando el paquete: 'table1'
## 
## The following objects are masked from 'package:base':
## 
##     units, units<-

library(dplyr)
options(scipen = 999, digits = 3, encoding = 'UTF-8')
library(descr)
library(expss)

## Cargando paquete requerido: maditr
## 
## Use magrittr pipe '%>%' to chain several operations:
##              mtcars %>%
##                  let(mpg_hp = mpg/hp) %>%
##                  take(mean(mpg_hp), by = am)
##         
## 
## 
## Adjuntando el paquete: 'maditr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following object is masked from 'package:readr':
## 
##     cols
## 
## 
## Adjuntando el paquete: 'expss'
## 
## The following objects are masked from 'package:stringr':
## 
##     fixed, regex
## 
## The following objects are masked from 'package:dplyr':
## 
##     compute, contains, na_if, recode, vars, where
## 
## The following objects are masked from 'package:purrr':
## 
##     keep, modify, modify_if, when
## 
## The following objects are masked from 'package:tidyr':
## 
##     contains, nest
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars

library(DescTools)

## 
## Adjuntando el paquete: 'DescTools'
## 
## The following object is masked from 'package:maditr':
## 
##     %like%
## 
## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD

library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(summarytools)

## 
## Adjuntando el paquete: 'summarytools'
## 
## The following objects are masked from 'package:descr':
## 
##     descr, freq
## 
## The following objects are masked from 'package:table1':
## 
##     label, label<-
## 
## The following object is masked from 'package:tibble':
## 
##     view

library(data.table)

## 
## Adjuntando el paquete: 'data.table'
## 
## The following object is masked from 'package:DescTools':
## 
##     %like%
## 
## The following objects are masked from 'package:expss':
## 
##     copy, like
## 
## The following objects are masked from 'package:maditr':
## 
##     copy, dcast, let, melt
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose

library(epiR)

## Cargando paquete requerido: survival
## Package epiR 2.0.77 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses

library(sjPlot)

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library(survival)
library(survminer)

## Cargando paquete requerido: ggpubr
## 
## Adjuntando el paquete: 'ggpubr'
## 
## The following object is masked from 'package:expss':
## 
##     compare_means
## 
## 
## Adjuntando el paquete: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma

library(pwr)
library(ggpubr)
library(lmtest)

## Warning: package 'lmtest' was built under R version 4.4.3

## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## 
## The following objects are masked from 'package:data.table':
## 
##     yearmon, yearqtr
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library("openxlsx")

## Warning: package 'openxlsx' was built under R version 4.4.3

library("car")

## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## 
## The following object is masked from 'package:DescTools':
## 
##     Recode
## 
## The following object is masked from 'package:expss':
## 
##     recode
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library("lmtest")
library("MASS")

## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

dm <- read_excel("C:/Users/Administrador/Downloads/dm.xlsx", sheet = 1)

#transformar en factor Sexo, bajaeduc, imc_cat
dm1=dm
dm <- dm %>% mutate(across(c(imc_cat,sexo,bajaeduc), as.factor))
levels(dm$imc_cat)

## [1] "1" "2" "3"

#Utilizar un modelo de regresión lineal simple para analizar la relación entre la glucemia y el índice de masa corporal (medido en forma continua). 
modelo1 <- lm(gluc~imc, data = dm)
confint(modelo1)

##              2.5 % 97.5 %
## (Intercept) 77.612 89.424
## imc          0.496  0.903

tab_model(modelo1)

	gluc
Predictors	Estimates	CI	p
(Intercept)	83.52	77.61 – 89.42	<0.001
imc	0.70	0.50 – 0.90	<0.001
Observations	247
R² / R² adjusted	0.157 / 0.154

summary(modelo1)

## 
## Call:
## lm(formula = gluc ~ imc, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.501  -6.351   0.302   6.100  27.002 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   83.518      2.998   27.85 < 0.0000000000000002 ***
## imc            0.699      0.103    6.76       0.000000000099 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.91 on 245 degrees of freedom
## Multiple R-squared:  0.157,  Adjusted R-squared:  0.154 
## F-statistic: 45.7 on 1 and 245 DF,  p-value: 0.0000000000995

#ecuación Glucemia = 83.52 + 0.70x IMC

#Construir un modelo de regresión lineal múltiple para evaluar la relación entre glucemia (gluc) e índice de masa corporal (imc) ajustando por las variables sexo y edad. 
dm <- dm %>% mutate(sexo1 = factor(sexo, levels = c("0", "1")))

modelo2 <-lm(gluc ~ imc+edad+sexo1, data = dm) 
confint(modelo2)

##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009

tab_model(modelo2)

	gluc
Predictors	Estimates	CI	p
(Intercept)	79.93	72.24 – 87.62	<0.001
imc	0.60	0.40 – 0.80	<0.001
edad	0.15	0.05 – 0.24	0.004
sexo1 [1]	-3.22	-5.43 – -1.01	0.004
Observations	247
R² / R² adjusted	0.220 / 0.211

summary(modelo2)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438

#¿Hay un efecto confundidor de la variable “Bajo nivel educativo” (bajaeduc) 
#sobre la asociación entre glucemia e índice de masa corporal? Justificar la 
#respuesta.
modelo3 <-lm(gluc ~ imc + bajaeduc, data = dm)
confint(modelo3)

##              2.5 % 97.5 %
## (Intercept) 77.609 89.414
## imc          0.468  0.883
## bajaeduc1   -0.965  3.595

tab_model(modelo3)

	gluc
Predictors	Estimates	CI	p
(Intercept)	83.51	77.61 – 89.41	<0.001
imc	0.68	0.47 – 0.88	<0.001
bajaeduc [1]	1.32	-0.96 – 3.59	0.257
Observations	247
R² / R² adjusted	0.162 / 0.155

summary(modelo3)

## 
## Call:
## lm(formula = gluc ~ imc + bajaeduc, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.783  -6.053   0.298   6.185  26.523 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   83.512      2.997   27.87 < 0.0000000000000002 ***
## imc            0.676      0.105    6.41        0.00000000076 ***
## bajaeduc1      1.315      1.157    1.14                 0.26    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.91 on 244 degrees of freedom
## Multiple R-squared:  0.162,  Adjusted R-squared:  0.155 
## F-statistic: 23.5 on 2 and 244 DF,  p-value: 0.000000000453

#baja educ no es confundidor

#Existe la hipótesis de que la asociación entre índice de masa corporal (imc) e 
#glucemia podría ser distinta según la edad de las personas. Crear una variable 
#dicotómica para edad con punto de corte mayor a 50 años. Testear la hipótesis 
#utilizando esta nueva variable. Concluir al respecto.  
#MODIF DE EFECTO
dm <- dm %>% mutate(edad_cat=as.factor(case_when(edad>50~"viejo",edad <=50~"joven")))
summary(dm$edad_cat)

## joven viejo 
##    87   160

modelo4 <-lm(gluc ~ imc+ edad_cat + imc:edad_cat, data = dm)
confint(modelo4)

##                    2.5 % 97.5 %
## (Intercept)       70.574 91.329
## imc                0.338  1.079
## edad_catviejo     -7.073 18.095
## imc:edad_catviejo -0.510  0.375

tab_model(modelo4)

	gluc
Predictors	Estimates	CI	p
(Intercept)	80.95	70.57 – 91.33	<0.001
imc	0.71	0.34 – 1.08	<0.001
edad cat [viejo]	5.51	-7.07 – 18.10	0.389
imc × edad cat [viejo]	-0.07	-0.51 – 0.37	0.764
Observations	247
R² / R² adjusted	0.189 / 0.179

summary(modelo4)

## 
## Call:
## lm(formula = gluc ~ imc + edad_cat + imc:edad_cat, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.209  -5.839   0.459   6.127  26.096 
## 
## Coefficients:
##                   Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        80.9512     5.2685   15.37 < 0.0000000000000002 ***
## imc                 0.7086     0.1881    3.77              0.00021 ***
## edad_catviejo       5.5112     6.3885    0.86              0.38917    
## imc:edad_catviejo  -0.0674     0.2244   -0.30              0.76415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.78 on 243 degrees of freedom
## Multiple R-squared:  0.189,  Adjusted R-squared:  0.179 
## F-statistic: 18.9 on 3 and 243 DF,  p-value: 0.0000000000477

#correr un modelo para mayores de 50 y otro para menores de 50

basemay50 <- subset(dm, edad_cat == "viejo")
base_men50<-subset(dm,edad_cat=="joven")
modelo6viejo<- lm(gluc ~ imc, data = basemay50)
confint(modelo6viejo)

##              2.5 % 97.5 %
## (Intercept) 79.305 93.620
## imc          0.399  0.884

tab_model(modelo6viejo)

	gluc
Predictors	Estimates	CI	p
(Intercept)	86.46	79.31 – 93.62	<0.001
imc	0.64	0.40 – 0.88	<0.001
Observations	160
R² / R² adjusted	0.147 / 0.142

summary(modelo6viejo)

## 
## Call:
## lm(formula = gluc ~ imc, data = basemay50)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.416  -5.962   0.046   6.527  26.096 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   86.462      3.624   23.86 < 0.0000000000000002 ***
## imc            0.641      0.123    5.22           0.00000056 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.81 on 158 degrees of freedom
## Multiple R-squared:  0.147,  Adjusted R-squared:  0.142 
## F-statistic: 27.2 on 1 and 158 DF,  p-value: 0.000000557

modelo6joven<- lm(gluc ~ imc, data = base_men50)
confint(modelo6joven)

##              2.5 % 97.5 %
## (Intercept) 70.533  91.37
## imc          0.337   1.08

tab_model(modelo6joven)

	gluc
Predictors	Estimates	CI	p
(Intercept)	80.95	70.53 – 91.37	<0.001
imc	0.71	0.34 – 1.08	<0.001
Observations	87
R² / R² adjusted	0.144 / 0.134

summary(modelo6joven)

## 
## Call:
## lm(formula = gluc ~ imc, data = base_men50)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28.21  -5.36   1.58   6.06  22.33 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   80.951      5.240   15.45 < 0.0000000000000002 ***
## imc            0.709      0.187    3.79              0.00028 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.73 on 85 degrees of freedom
## Multiple R-squared:  0.144,  Adjusted R-squared:  0.134 
## F-statistic: 14.4 on 1 and 85 DF,  p-value: 0.000282

modelo5 <- lm(gluc ~ imc+edad+sexo1+ imc: edad_cat, data = dm)
confint(modelo5)

##                     2.5 % 97.5 %
## (Intercept)       70.3915 91.669
## imc                0.3674  0.807
## edad              -0.0433  0.294
## sexo11            -5.4203 -0.980
## imc:edad_catviejo -0.1176  0.159

tab_model(modelo5)

	gluc
Predictors	Estimates	CI	p
(Intercept)	81.03	70.39 – 91.67	<0.001
imc	0.59	0.37 – 0.81	<0.001
edad	0.13	-0.04 – 0.29	0.144
sexo1 [1]	-3.20	-5.42 – -0.98	0.005
imc × edad catviejo	0.02	-0.12 – 0.16	0.769
Observations	247
R² / R² adjusted	0.221 / 0.208

summary(modelo5)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1 + imc:edad_cat, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.963  -5.771   0.993   6.205  26.049 
## 
## Coefficients:
##                   Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)        81.0304     5.4009   15.00 < 0.0000000000000002 ***
## imc                 0.5871     0.1115    5.26           0.00000031 ***
## edad                0.1254     0.0857    1.46               0.1444    
## sexo11             -3.2002     1.1271   -2.84               0.0049 ** 
## imc:edad_catviejo   0.0207     0.0702    0.29               0.7685    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.63 on 242 degrees of freedom
## Multiple R-squared:  0.221,  Adjusted R-squared:  0.208 
## F-statistic: 17.1 on 4 and 242 DF,  p-value: 0.00000000000221

#modelo que incluye imc, edad y sexo en función de edad

modelo6<- lm(gluc ~ imc+edad+sexo1, data = dm)
confint(modelo6)

##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009

tab_model(modelo6)

	gluc
Predictors	Estimates	CI	p
(Intercept)	79.93	72.24 – 87.62	<0.001
imc	0.60	0.40 – 0.80	<0.001
edad	0.15	0.05 – 0.24	0.004
sexo1 [1]	-3.22	-5.43 – -1.01	0.004
Observations	247
R² / R² adjusted	0.220 / 0.211

summary(modelo6)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438

#evaluación de linealidad
#Generación de predichos

predichos <- fitted.values(modelo6)

#Generación de residuos crudos y estandarizados

res <- residuals(modelo6)
#generación de residuos estandarizados:

standres <-rstandard(modelo6)

#gráfico para evaluar linealidad

plot(y=modelo6$residuals,x=modelo6$fitted.values)
abline(h=0)

#gráfico avplots de residuos de variable x en relación a variable y
avPlots(modelo6)

#evaluación de normalidad
densityPlot(modelo6$residuals)

hist(modelo6$residuals)

boxplot(modelo6$residuals)

qqPlot(standres) #se utilizan residuos estandarizados

## [1] 109  19

describe(modelo6$residuals)

##    vars   n mean   sd median trimmed  mad   min  max range  skew kurtosis   se
## X1    1 247    0 8.56   0.89    0.19 8.85 -27.1 26.3  53.5 -0.22     0.18 0.54

shapiro.test(modelo6$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  modelo6$residuals
## W = 1, p-value = 0.2

#test de Breusch Pagan de homocedasticidad
bptest(modelo6)

## 
##  studentized Breusch-Pagan test
## 
## data:  modelo6
## BP = 1, df = 3, p-value = 0.7

#Independencia: Dubin Watson

dwtest(modelo6)

## 
##  Durbin-Watson test
## 
## data:  modelo6
## DW = 2, p-value = 0.6
## alternative hypothesis: true autocorrelation is greater than 0

#multicolinealidad
vif(modelo6)

##   imc  edad sexo1 
##  1.05  1.06  1.05

library(GGally)

## Warning: package 'GGally' was built under R version 4.4.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## 
## Adjuntando el paquete: 'GGally'

## The following object is masked from 'package:emmeans':
## 
##     pigs

vars <- dm[, c("gluc", "sexo", "edad", "imc")]
vars$sexo <- as.factor(vars$sexo)
ggpairs(vars, title = "Matriz de correlaciones y dispersión")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#outliers
standres <-rstandard(modelo6) 
predichos <- modelo6$fitted.values 
plot(y=standres, x=predichos) 
abline(h=c(-3, 3))

which(abs(standres) > 3)

##  19 109 223 
##  19 109 223

standres[standres >3]

##  223 
## 3.08

standres[standres < -3]

##    19   109 
## -3.17 -3.17

#valores influyentes
leverage <- as.data.frame(hatvalues(modelo6)) 
cooksd <- cooks.distance(modelo6) 
cooksd[cooksd>(4/247)]#n es el tamaño muestral

##     14     19     46     64    109    129    183    196    202    215    223 
## 0.0206 0.0736 0.0185 0.0326 0.0309 0.0235 0.0190 0.0426 0.0169 0.0190 0.0335 
##    245 
## 0.0210

#Leverage
leverage <- hatvalues(modelo6)

#Distancia de Cook
cooksd <- cooks.distance(modelo6)

#Umbral típico para Cook's Distance
umbral_cook <- 4 / length(cooksd)

#Indices de observaciones influyentes
influyentes <- which(cooksd > umbral_cook)

#Crear tabla resumen
tabla_influyentes <- data.frame(
  fila = influyentes,
  leverage = leverage[influyentes],
  distancia_cook = cooksd[influyentes]
)

#Mostrar tabla
print(tabla_influyentes)

##     fila leverage distancia_cook
## 14    14   0.0279         0.0206
## 19    19   0.0285         0.0736
## 46    46   0.0142         0.0185
## 64    64   0.0267         0.0326
## 109  109   0.0122         0.0309
## 129  129   0.0192         0.0235
## 183  183   0.0157         0.0190
## 196  196   0.0256         0.0426
## 202  202   0.0186         0.0169
## 215  215   0.0199         0.0190
## 223  223   0.0139         0.0335
## 245  245   0.0197         0.0210

# Calcular leverage (valores de hat)
leverage <- hatvalues(modelo6)

# Calcular distancia de Cook
cooks <- cooks.distance(modelo6)

#Identificar outliers (residuos estandarizados > 3 o < -3)
outlier_indices <- which(abs(standres) > 3)

#Crear un data frame resumen con info relevante de los outliers
outliers_tabla <- data.frame(
  fila = outlier_indices,
  valor_real = dm[outlier_indices, modelo6$call$formula[[2]]],  # Variable dependiente
  predicho = predichos[outlier_indices],
  residuo_estandarizado = standres[outlier_indices],
  leverage = leverage[outlier_indices],
  distancia_cook = cooks[outlier_indices]
)

#Ver tabla
print(outliers_tabla)

##     fila gluc predicho residuo_estandarizado leverage distancia_cook
## 19    19   78      105                 -3.17   0.0285         0.0736
## 109  109   74      101                 -3.17   0.0122         0.0309
## 223  223  135      109                  3.08   0.0139         0.0335

#Se corre el modelo sin la observación 19 y sin la observación 196
SIN_19 <- dm [-19, ]

confint(modelo6)

##              2.5 % 97.5 %
## (Intercept) 72.243 87.623
## imc          0.398  0.802
## edad         0.047  0.245
## sexo11      -5.433 -1.009

tab_model(modelo6)

	gluc
Predictors	Estimates	CI	p
(Intercept)	79.93	72.24 – 87.62	<0.001
imc	0.60	0.40 – 0.80	<0.001
edad	0.15	0.05 – 0.24	0.004
sexo1 [1]	-3.22	-5.43 – -1.01	0.004
Observations	247
R² / R² adjusted	0.220 / 0.211

summary(modelo6)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = dm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.131  -5.779   0.886   6.233  26.335 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  79.9332     3.9040   20.47 < 0.0000000000000002 ***
## imc           0.6000     0.1024    5.86          0.000000015 ***
## edad          0.1459     0.0502    2.91               0.0040 ** 
## sexo11       -3.2209     1.1228   -2.87               0.0045 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.61 on 243 degrees of freedom
## Multiple R-squared:  0.22,   Adjusted R-squared:  0.211 
## F-statistic: 22.9 on 3 and 243 DF,  p-value: 0.000000000000438

modelo9<- lm(gluc ~ imc+edad+sexo1, data = SIN_19)
confint(modelo9)

##              2.5 % 97.5 %
## (Intercept) 71.202 86.357
## imc          0.396  0.792
## edad         0.071  0.267
## sexo11      -5.098 -0.743

tab_model(modelo9)

	gluc
Predictors	Estimates	CI	p
(Intercept)	78.78	71.20 – 86.36	<0.001
imc	0.59	0.40 – 0.79	<0.001
edad	0.17	0.07 – 0.27	0.001
sexo1 [1]	-2.92	-5.10 – -0.74	0.009
Observations	246
R² / R² adjusted	0.231 / 0.221

summary(modelo9)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = SIN_19)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.122  -5.740   0.782   6.112  26.466 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  78.7795     3.8470   20.48 < 0.0000000000000002 ***
## imc           0.5942     0.1005    5.91          0.000000011 ***
## edad          0.1690     0.0498    3.40               0.0008 ***
## sexo11       -2.9202     1.1055   -2.64               0.0088 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.45 on 242 degrees of freedom
## Multiple R-squared:  0.231,  Adjusted R-squared:  0.221 
## F-statistic: 24.2 on 3 and 242 DF,  p-value: 0.0000000000000964

SIN_196 <- dm [-196, ]
modelo10<- lm(gluc ~ imc+edad+sexo1, data = SIN_196)
confint(modelo10)

##               2.5 % 97.5 %
## (Intercept) 72.5552 87.764
## imc          0.3730  0.774
## edad         0.0605  0.257
## sexo11      -5.5906 -1.209

tab_model(modelo10)

	gluc
Predictors	Estimates	CI	p
(Intercept)	80.16	72.56 – 87.76	<0.001
imc	0.57	0.37 – 0.77	<0.001
edad	0.16	0.06 – 0.26	0.002
sexo1 [1]	-3.40	-5.59 – -1.21	0.002
Observations	246
R² / R² adjusted	0.226 / 0.216

summary(modelo10)

## 
## Call:
## lm(formula = gluc ~ imc + edad + sexo1, data = SIN_196)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.198  -5.766   0.952   5.985  26.357 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  80.1597     3.8605   20.76 < 0.0000000000000002 ***
## imc           0.5734     0.1018    5.63          0.000000049 ***
## edad          0.1588     0.0499    3.18               0.0016 ** 
## sexo11       -3.3998     1.1122   -3.06               0.0025 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.51 on 242 degrees of freedom
## Multiple R-squared:  0.226,  Adjusted R-squared:  0.216 
## F-statistic: 23.5 on 3 and 242 DF,  p-value: 0.000000000000218

#Crear tablas comparativas de los coeficientes con y sin las observaciones 19 y 196
# Extraer coeficientes
coef_ori <- coef(modelo6)
coef_sin <- coef(modelo9)

# Crear tabla comparativa
comparacion <- data.frame(
  Coeficiente = names(coef_ori),
  Original = coef_ori,
  Sin_19 = coef_sin,
  Cambio_pct = 100 * (coef_sin - coef_ori) / coef_ori
)

# Mostrar tabla
print(comparacion)

##             Coeficiente Original Sin_19 Cambio_pct
## (Intercept) (Intercept)   79.933 78.780     -1.443
## imc                 imc    0.600  0.594     -0.962
## edad               edad    0.146  0.169     15.842
## sexo11           sexo11   -3.221 -2.920     -9.335

# Extraer coeficientes
coef_ori1 <- coef(modelo6)
coef_sin1 <- coef(modelo10)

# Crear tabla comparativa
comparacion1 <- data.frame(
  Coeficiente = names(coef_ori),
  Original = coef_ori1,
  Sin_196 = coef_sin1,
  Cambio_pct = 100 * (coef_sin1 - coef_ori1) / coef_ori1
)

# Mostrar tabla
print(comparacion1)

##             Coeficiente Original Sin_196 Cambio_pct
## (Intercept) (Intercept)   79.933  80.160      0.283
## imc                 imc    0.600   0.573     -4.430
## edad               edad    0.146   0.159      8.825
## sexo11           sexo11   -3.221  -3.400      5.556

Ejercicio2

2025-04-08