library(readr)
## Warning: package 'readr' was built under R version 4.5.3
insurance <- read_csv("C:/Users/MINEDUCYT/Downloads/insurance.csv")
## Rows: 1338 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): sex, smoker, region
## dbl (4): age, bmi, children, charges
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(insurance)
library(stargazer)
modelo_seguro <- lm(charges ~ age + sex + bmi + children + smoker + region, data = insurance)
stargazer::stargazer(modelo_seguro, type = "text", title = "Resultados del modelo de regresión lineal")
##
## Resultados del modelo de regresión lineal
## ===============================================
## Dependent variable:
## ---------------------------
## charges
## -----------------------------------------------
## age 256.856***
## (11.899)
##
## sexmale -131.314
## (332.945)
##
## bmi 339.193***
## (28.599)
##
## children 475.501***
## (137.804)
##
## smokeryes 23,848.530***
## (413.153)
##
## regionnorthwest -352.964
## (476.276)
##
## regionsoutheast -1,035.022**
## (478.692)
##
## regionsouthwest -960.051**
## (477.933)
##
## Constant -11,938.540***
## (987.819)
##
## -----------------------------------------------
## Observations 1,338
## R2 0.751
## Adjusted R2 0.749
## Residual Std. Error 6,062.102 (df = 1329)
## F Statistic 500.811*** (df = 8; 1329)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
library(equatiomatic)
## Warning: package 'equatiomatic' was built under R version 4.5.3
##
## Adjuntando el paquete: 'equatiomatic'
## The following object is masked from 'package:datasets':
##
## penguins
extract_eq(modelo_seguro,wrap = TRUE) #optativo
\[ \begin{aligned} \operatorname{charges} &= \alpha + \beta_{1}(\operatorname{age}) + \beta_{2}(\operatorname{sex}_{\operatorname{male}}) + \beta_{3}(\operatorname{bmi})\ + \\ &\quad \beta_{4}(\operatorname{children}) + \beta_{5}(\operatorname{smoker}_{\operatorname{yes}}) + \beta_{6}(\operatorname{region}_{\operatorname{northwest}}) + \beta_{7}(\operatorname{region}_{\operatorname{southeast}})\ + \\ &\quad \beta_{8}(\operatorname{region}_{\operatorname{southwest}}) + \epsilon \end{aligned} \]
confint(modelo_seguro, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) -13876.3934 -10000.68373
## age 233.5138 280.19893
## sexmale -784.4703 521.84155
## bmi 283.0884 395.29848
## children 205.1633 745.83780
## smokeryes 23038.0307 24659.03838
## regionnorthwest -1287.2982 581.37040
## regionsoutheast -1974.0968 -95.94733
## regionsouthwest -1897.6364 -22.46560
# 1. Cargar libreria
library(stargazer)
# 2. Definimos el modelo COMPLETO (todas las variables)
modelo_completo <- lm(charges ~ age + sex + bmi + children + smoker + region, data = insurance)
# 3. Definimos el modelo REDUCIDO (quitando sex y region)
modelo_reducido <- lm(charges ~ age + bmi + children + smoker, data = insurance)
# 4. Funcion para calcular el Cp de Mallows
calcular_cp <- function(modelo_evaluar, modelo_full) {
sse_p <- sum(residuals(modelo_evaluar)^2)
mse_full <- summary(modelo_full)$sigma^2
n <- length(residuals(modelo_evaluar))
p <- length(coef(modelo_evaluar))
cp <- (sse_p / mse_full) - n + (2 * p)
return(round(cp, 2))
}
# 5. Calculamos los indices Cp
cp_reducido <- calcular_cp(modelo_reducido, modelo_completo)
cp_completo <- calcular_cp(modelo_completo, modelo_completo)
# 6. Generar la tabla comparativa sin caracteres especiales
stargazer(modelo_reducido, modelo_completo,
type = "text",
title = "Prueba de seleccion de variables (Sin Sexo y Region)",
add.lines = list(c("C_p de Mallows", cp_reducido, cp_completo)))
##
## Prueba de seleccion de variables (Sin Sexo y Region)
## =======================================================================
## Dependent variable:
## ---------------------------------------------------
## charges
## (1) (2)
## -----------------------------------------------------------------------
## age 257.850*** 256.856***
## (11.896) (11.899)
##
## sexmale -131.314
## (332.945)
##
## bmi 321.851*** 339.193***
## (27.378) (28.599)
##
## children 473.502*** 475.501***
## (137.792) (137.804)
##
## smokeryes 23,811.400*** 23,848.530***
## (411.220) (413.153)
##
## regionnorthwest -352.964
## (476.276)
##
## regionsoutheast -1,035.022**
## (478.692)
##
## regionsouthwest -960.051**
## (477.933)
##
## Constant -12,102.770*** -11,938.540***
## (941.984) (987.819)
##
## -----------------------------------------------------------------------
## Cde Mallows 7.5 9
## Observations 1,338 1,338
## R2 0.750 0.751
## Adjusted R2 0.749 0.749
## Residual Std. Error 6,067.787 (df = 1333) 6,062.102 (df = 1329)
## F Statistic 998.123*** (df = 4; 1333) 500.811*** (df = 8; 1329)
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01