#Abrir data

library(rio) 
data=import("DATA.xlsx") 
names (data) 
## [1] "DISTRITO"       "anemia_ninos"   "numero_medicos" "primary_HCF"   
## [5] "sis"            "altura_m"

#Que tipo de variables tengo

str(data)
## 'data.frame':    110 obs. of  6 variables:
##  $ DISTRITO      : chr  "ACHAYA" "ARAPA" "ASILLO" "AZANGARO" ...
##  $ anemia_ninos  : num  2.3 4.3 9.8 17 15.9 12.2 20.6 6.8 14 7 ...
##  $ numero_medicos: num  3.99 4.42 5.82 15.02 7.19 ...
##  $ primary_HCF   : num  126 156 174 173 86 116 133 180 176 149 ...
##  $ sis           : num  0.986 0.981 0.944 0.883 0.962 ...
##  $ altura_m      : num  3844 3848 3925 3880 3860 ...

Las hipotesis que usaré, inicio con una y voy agragando una por una

hipotesis1=formula(anemia_ninos~numero_medicos )
hipotesis2=formula(anemia_ninos~numero_medicos+primary_HCF)
hipotesis3=formula(anemia_ninos~numero_medicos+primary_HCF+sis)
hipotesis4=formula(anemia_ninos~numero_medicos+primary_HCF+sis+altura_m)

#creo los test por cada hipotesis: son 4 en total

test1=lm(hipotesis1, data=data)
test2=lm(hipotesis2, data=data)
test3=lm(hipotesis3, data=data)
test4=lm(hipotesis4, data=data)

#el summary de la 1ra hipotesis, aca solo se contempla la VD y una VI (numero_medicos)

summary(test1)
## 
## Call:
## lm(formula = hipotesis1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.860  -6.501  -2.267   4.141  43.407 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.5978     1.5457   6.209 1.01e-08 ***
## numero_medicos   0.1608     0.1588   1.012    0.314    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.71 on 108 degrees of freedom
## Multiple R-squared:  0.009401,   Adjusted R-squared:  0.0002292 
## F-statistic: 1.025 on 1 and 108 DF,  p-value: 0.3136

2da hipotesis: VD con numero_medicos + primary_HCF

summary(test2)
## 
## Call:
## lm(formula = hipotesis2, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.852  -6.516  -2.458   3.881  44.445 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.697629   2.018885   3.813  0.00023 ***
## numero_medicos 0.164293   0.158003   1.040  0.30077    
## primary_HCF    0.010516   0.007239   1.453  0.14926    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.66 on 107 degrees of freedom
## Multiple R-squared:  0.02856,    Adjusted R-squared:  0.0104 
## F-statistic: 1.573 on 2 and 107 DF,  p-value: 0.2122

3ra hipotesis: VD con numero_medicos + primary_HCF + sis

summary(test3)
## 
## Call:
## lm(formula = hipotesis3, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.751  -6.336  -1.958   3.398  42.436 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     35.856175  17.768887   2.018   0.0461 *
## numero_medicos   0.154126   0.157004   0.982   0.3285  
## primary_HCF      0.007752   0.007393   1.049   0.2968  
## sis            -29.680254  18.609578  -1.595   0.1137  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.591 on 106 degrees of freedom
## Multiple R-squared:  0.05132,    Adjusted R-squared:  0.02447 
## F-statistic: 1.912 on 3 and 106 DF,  p-value: 0.1321

4ta hipotesis: VD con numero_medicos + primary_HCF + sis + altura_m

summary(test4)
## 
## Call:
## lm(formula = hipotesis4, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.287  -5.168  -1.313   3.815  24.020 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    53.557226  13.790340   3.884  0.00018 ***
## numero_medicos  0.183370   0.120551   1.521  0.13124    
## primary_HCF     0.007710   0.005675   1.359  0.17718    
## sis            -7.291659  14.515405  -0.502  0.61648    
## altura_m       -0.010326   0.001193  -8.657 6.21e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.361 on 105 degrees of freedom
## Multiple R-squared:  0.4464, Adjusted R-squared:  0.4253 
## F-statistic: 21.17 on 4 and 105 DF,  p-value: 8.032e-13

ACA eleigimos que ecuación (hipotesis) es mejor

mejorRegre=anova(test1,test2,test3,test4)
mejorRegre
## Analysis of Variance Table
## 
## Model 1: anemia_ninos ~ numero_medicos
## Model 2: anemia_ninos ~ numero_medicos + primary_HCF
## Model 3: anemia_ninos ~ numero_medicos + primary_HCF + sis
## Model 4: anemia_ninos ~ numero_medicos + primary_HCF + sis + altura_m
##   Res.Df     RSS Df Sum of Sq       F   Pr(>F)    
## 1    108 10181.6                                  
## 2    107  9984.7  1     196.9  3.6336  0.05936 .  
## 3    106  9750.8  1     234.0  4.3180  0.04015 *  
## 4    105  5689.8  1    4061.0 74.9413 6.21e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(dotwhisker)
## Warning: package 'dotwhisker' was built under R version 4.6.1
## Cargando paquete requerido: ggplot2
dwplot(list(test1, test2, test3, test4), show_intercept = FALSE) +
  geom_vline(xintercept = 0, linetype = "dashed",linewidth=0.5) +
  labs(title = "Coefficient Plot")