library(readr)
## Warning: package 'readr' was built under R version 4.5.3
insurance <- read_csv("C:/Users/MINEDUCYT/Downloads/insurance.csv")
## Rows: 1338 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): sex, smoker, region
## dbl (4): age, bmi, children, charges
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(insurance)
library(stargazer)
modelo_seguro <- lm(charges ~ age + sex + bmi + children + smoker + region, data = insurance)

stargazer::stargazer(modelo_seguro, type = "text", title = "Resultados del modelo de regresión lineal")
## 
## Resultados del modelo de regresión lineal
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               charges          
## -----------------------------------------------
## age                         256.856***         
##                              (11.899)          
##                                                
## sexmale                      -131.314          
##                              (332.945)         
##                                                
## bmi                         339.193***         
##                              (28.599)          
##                                                
## children                    475.501***         
##                              (137.804)         
##                                                
## smokeryes                  23,848.530***       
##                              (413.153)         
##                                                
## regionnorthwest              -352.964          
##                              (476.276)         
##                                                
## regionsoutheast            -1,035.022**        
##                              (478.692)         
##                                                
## regionsouthwest             -960.051**         
##                              (477.933)         
##                                                
## Constant                  -11,938.540***       
##                              (987.819)         
##                                                
## -----------------------------------------------
## Observations                   1,338           
## R2                             0.751           
## Adjusted R2                    0.749           
## Residual Std. Error    6,062.102 (df = 1329)   
## F Statistic          500.811*** (df = 8; 1329) 
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
library(equatiomatic)
## Warning: package 'equatiomatic' was built under R version 4.5.3
## 
## Adjuntando el paquete: 'equatiomatic'
## The following object is masked from 'package:datasets':
## 
##     penguins
extract_eq(modelo_seguro,wrap = TRUE) #optativo

\[ \begin{aligned} \operatorname{charges} &= \alpha + \beta_{1}(\operatorname{age}) + \beta_{2}(\operatorname{sex}_{\operatorname{male}}) + \beta_{3}(\operatorname{bmi})\ + \\ &\quad \beta_{4}(\operatorname{children}) + \beta_{5}(\operatorname{smoker}_{\operatorname{yes}}) + \beta_{6}(\operatorname{region}_{\operatorname{northwest}}) + \beta_{7}(\operatorname{region}_{\operatorname{southeast}})\ + \\ &\quad \beta_{8}(\operatorname{region}_{\operatorname{southwest}}) + \epsilon \end{aligned} \]

confint(modelo_seguro, level = 0.95)
##                       2.5 %       97.5 %
## (Intercept)     -13876.3934 -10000.68373
## age                233.5138    280.19893
## sexmale           -784.4703    521.84155
## bmi                283.0884    395.29848
## children           205.1633    745.83780
## smokeryes        23038.0307  24659.03838
## regionnorthwest  -1287.2982    581.37040
## regionsoutheast  -1974.0968    -95.94733
## regionsouthwest  -1897.6364    -22.46560

Prueba CP-Mallows

incluyendo todas las variables

# 1. Cargar libreria
library(stargazer)

# 2. Definimos el modelo COMPLETO (todas las variables)
modelo_completo <- lm(charges ~ age + sex + bmi + children + smoker + region, data = insurance)

# 3. Definimos el modelo REDUCIDO (quitando sex y region)
modelo_reducido <- lm(charges ~ age + bmi + children + smoker, data = insurance)

# 4. Funcion para calcular el Cp de Mallows
calcular_cp <- function(modelo_evaluar, modelo_full) {
  sse_p <- sum(residuals(modelo_evaluar)^2)
  mse_full <- summary(modelo_full)$sigma^2 
  n <- length(residuals(modelo_evaluar))
  p <- length(coef(modelo_evaluar))
  
  cp <- (sse_p / mse_full) - n + (2 * p)
  return(round(cp, 2))
}

# 5. Calculamos los indices Cp
cp_reducido <- calcular_cp(modelo_reducido, modelo_completo)
cp_completo <- calcular_cp(modelo_completo, modelo_completo)

# 6. Generar la tabla comparativa sin caracteres especiales
stargazer(modelo_reducido, modelo_completo,
          type = "text", 
          title = "Prueba de seleccion de variables (Sin Sexo y Region)",
          add.lines = list(c("C_p de Mallows", cp_reducido, cp_completo)))
## 
## Prueba de seleccion de variables (Sin Sexo y Region)
## =======================================================================
##                                     Dependent variable:                
##                     ---------------------------------------------------
##                                           charges                      
##                                (1)                       (2)           
## -----------------------------------------------------------------------
## age                        257.850***                256.856***        
##                             (11.896)                  (11.899)         
##                                                                        
## sexmale                                               -131.314         
##                                                       (332.945)        
##                                                                        
## bmi                        321.851***                339.193***        
##                             (27.378)                  (28.599)         
##                                                                        
## children                   473.502***                475.501***        
##                             (137.792)                 (137.804)        
##                                                                        
## smokeryes                 23,811.400***             23,848.530***      
##                             (411.220)                 (413.153)        
##                                                                        
## regionnorthwest                                       -352.964         
##                                                       (476.276)        
##                                                                        
## regionsoutheast                                     -1,035.022**       
##                                                       (478.692)        
##                                                                        
## regionsouthwest                                      -960.051**        
##                                                       (477.933)        
##                                                                        
## Constant                 -12,102.770***            -11,938.540***      
##                             (941.984)                 (987.819)        
##                                                                        
## -----------------------------------------------------------------------
## Cde Mallows                    7.5                        9            
## Observations                  1,338                     1,338          
## R2                            0.750                     0.751          
## Adjusted R2                   0.749                     0.749          
## Residual Std. Error   6,067.787 (df = 1333)     6,062.102 (df = 1329)  
## F Statistic         998.123*** (df = 4; 1333) 500.811*** (df = 8; 1329)
## =======================================================================
## Note:                                       *p<0.1; **p<0.05; ***p<0.01