#EJERCICIOS FINALES II

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rio)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
data = import("data.xlsx")
## New names:
## • `` -> `...1`

##PREGUNTA 1

data_1= data|>
  select()

##PREGUNTA 2

colnames(data)[1] = "numero"

Porcentaje de viviendas

data$porc_vivSIelec=(data$elec1_Sí/data$elec3_Total)*100

Razon de votacion de Castillo entre Keiko

data$razonck=(data$Castillo/data$Keiko)

Tasa de fallecidos por cada 1000 contagiados

data$tfx1000=(data$countFallecidos/data$countPositivos)*1000
dataP1 = filter(data, !numero %in% c(128,129,130,131,132,133,134,135,136,137))
dataP1=dataP1|>
  select("porc_vivSIelec","razonck","tfx1000")

##EJERCICIO 3

data$ganaCastillo=as.factor(data$ganaCastillo)
data$ganaCastillo<-factor(data$ganaCastillo, 
                   levels=0:1, 
                   labels=c("No", "Sí"))
set.seed(2019)

h1=formula(ganaCastillo~porc_vivSIelec+tfx1000)

rlog1=glm(h1, data=data,family = binomial)
modelrl=list('Gana Castillo'=rlog1)
modelsummary(modelrl,
             title = "Regresión Logística",
             stars = TRUE,
             output = "kableExtra")
Regresión Logística
Gana Castillo
(Intercept) 6.232***
(1.657)
porc_vivSIelec -0.063**
(0.020)
tfx1000 0.000
(0.001)
Num.Obs. 196
AIC 199.5
BIC 209.4
Log.Lik. -96.760
F 5.026
RMSE 0.39
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
data$porc_aguared=(data$agua1_Red/data$agua10_Total)*100
h2=formula(ganaCastillo~porc_vivSIelec+tfx1000+porc_aguared)

rlog2=glm(h2, data=data,family = binomial)
modelrl2=list('Hipotesis1'=rlog1,'Hipotesis2'=rlog2)
modelsummary(modelrl2,
             title = "Regresión Logística",
             stars = TRUE,
             output = "kableExtra")
Regresión Logística
Hipotesis1  Hipotesis2
(Intercept) 6.232*** 7.226***
(1.657) (1.888)
porc_vivSIelec -0.063** -0.088**
(0.020) (0.029)
tfx1000 0.000 0.000
(0.001) (0.001)
porc_aguared 0.019
(0.015)
Num.Obs. 196 196
AIC 199.5 200.0
BIC 209.4 213.1
Log.Lik. -96.760 -95.994
F 5.026 3.681
RMSE 0.39 0.39
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001