#PREGUNTA 1 - MORTALIDAD MATERNA

library(rio)
lkXLSX="https://docs.google.com/spreadsheets/d/1s2Ne9uQ7nwTvSo6ckNQHV35NZt-OYs8V/edit#gid=1446884572"
data1=import(lkXLSX)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data1 <- data1 %>% filter(!is.na(TasaFertil1mMuje) & TasaFertil1mMuje != "")
library(tidyverse)
data1 <- data1 %>% filter(!is.na(EmployPerPop) & EmployPerPop != "")
library(tidyverse)
data1 <- data1 %>% filter(!is.na(Tuberculosis100m) & Tuberculosis100m != "")
library(tidyverse)
data1 <- data1 %>% filter(!is.na(MaterMort100m) & MaterMort100m != "")
library(tidyverse)
data1 <- data1 %>% filter(!is.na(UndernourishPerPop) & UndernourishPerPop != "")
modeloA <- lm(MaterMort100m ~ Tuberculosis100m + EmployPerPop, data = data1)
summary(modeloA)
## 
## Call:
## lm(formula = MaterMort100m ~ Tuberculosis100m + EmployPerPop, 
##     data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -500.53  -89.79  -49.00   33.07 1044.18 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -29.99461   45.62215  -0.657   0.5117    
## Tuberculosis100m   0.73456    0.09399   7.815 4.51e-13 ***
## EmployPerPop       2.18668    0.92684   2.359   0.0194 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 185.9 on 179 degrees of freedom
## Multiple R-squared:  0.2935, Adjusted R-squared:  0.2856 
## F-statistic: 37.18 on 2 and 179 DF,  p-value: 3.141e-14
modeloB <- lm(MaterMort100m ~ TasaFertil1mMuje + EmployPerPop, data = data1)
summary(modeloB)
## 
## Call:
## lm(formula = MaterMort100m ~ TasaFertil1mMuje + EmployPerPop, 
##     data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -276.87  -71.77   -2.35   37.66  897.65 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -70.8583    33.3665  -2.124   0.0351 *  
## TasaFertil1mMuje   4.3144     0.2606  16.556   <2e-16 ***
## EmployPerPop       0.4119     0.6891   0.598   0.5508    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 135.3 on 179 degrees of freedom
## Multiple R-squared:  0.6256, Adjusted R-squared:  0.6215 
## F-statistic: 149.6 on 2 and 179 DF,  p-value: < 2.2e-16

#PREGUNTA 2 - PRESOS

library(rio)
lkXLSX="https://docs.google.com/spreadsheets/d/1AhloZIapoIhz4-qOgn3u2AXKykKoA9-fDGnhT2CGxt0/edit#gid=1573532387"
data2=import(lkXLSX)
data2$casado <- factor(data2$casado, levels = c(0, 1), labels = c("Sí", "No"))
library(survival)
data2$survival=with(data2,Surv(time = semanasLibre,event =  as.numeric(fueArrestado)))
# que es:

library(magrittr) # needed for pipe %>% 
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
data2%>%
    rmarkdown::paged_table()
library(ggplot2)
library(ggfortify)

#aqui el generico
KM.generico = survfit(survival ~ 1, data = data2)

###graficando
ejeX='SEMANAS\n curva cae cuando alguien es arrestado'
ejeY='Probabilidad \n(PERMANECER LIBRE)'
titulo="Curva de Sobrevivencia: permanecer libre"
autoplot(KM.generico,xlab=ejeX,ylab=ejeY, main = titulo,conf.int = F)

#Investigador A

modelo_A <- coxph(survival ~ edad + casado, data = data2)
summary (modelo_A)
## Call:
## coxph(formula = survival ~ edad + casado, data = data2)
## 
##   n= 432, number of events= 114 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)   
## edad     -0.06672   0.93545  0.02082 -3.205  0.00135 **
## casadoNo -0.49314   0.61070  0.37269 -1.323  0.18576   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## edad        0.9355      1.069    0.8981    0.9744
## casadoNo    0.6107      1.637    0.2942    1.2678
## 
## Concordance= 0.616  (se = 0.027 )
## Likelihood ratio test= 17.27  on 2 df,   p=2e-04
## Wald test            = 13.99  on 2 df,   p=9e-04
## Score (logrank) test = 14.51  on 2 df,   p=7e-04
aic_modelo <- AIC(modelo_A)
print(aic_modelo)
## [1] 1337.494

#Investigador B

modelo_B <- coxph(survival ~ edad * casado, data = data2)
summary (modelo_B)
## Call:
## coxph(formula = survival ~ edad * casado, data = data2)
## 
##   n= 432, number of events= 114 
## 
##                   coef exp(coef) se(coef)      z Pr(>|z|)   
## edad          -0.05857   0.94311  0.02093 -2.798  0.00514 **
## casadoNo       3.46762  32.06033  2.80648  1.236  0.21662   
## edad:casadoNo -0.16260   0.84993  0.11852 -1.372  0.17009   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## edad             0.9431    1.06032    0.9052    0.9826
## casadoNo        32.0603    0.03119    0.1309 7849.5624
## edad:casadoNo    0.8499    1.17657    0.6737    1.0722
## 
## Concordance= 0.616  (se = 0.026 )
## Likelihood ratio test= 19.76  on 3 df,   p=2e-04
## Wald test            = 12.61  on 3 df,   p=0.006
## Score (logrank) test = 14.58  on 3 df,   p=0.002
aic_modelo2 <- AIC(modelo_B)
print(aic_modelo2)
## [1] 1336.999

#PREGUNTA 4 - ADMISIÓN

library(rio)
lkXLSX="https://docs.google.com/spreadsheets/d/1qrRFEAB-jZBUUjn9XwEm8mi-l0l4mJnCWUPX6aXtpz4/edit#gid=0"
data3=import(lkXLSX)
data3$admitido=as.factor(data3$admitido)
data3$admitido=factor(data3$admitido,levels=levels(data3$admitido) ,labels=c("No","Si"))

data3$prestigio=as.factor(data3$prestigio)
data3$prestigio=factor(data3$prestigio,levels=levels(data3$prestigio),labels=c("1","2", "3", "4"), ordered = T)

str(data3)
## 'data.frame':    400 obs. of  4 variables:
##  $ admitido : Factor w/ 2 levels "No","Si": 1 2 2 2 1 2 2 1 2 1 ...
##  $ letras   : int  380 660 800 640 520 760 560 400 540 700 ...
##  $ ciencias : int  361 367 400 319 293 300 298 308 339 392 ...
##  $ prestigio: Ord.factor w/ 4 levels "1"<"2"<"3"<"4": 2 2 4 1 1 3 4 3 2 3 ...
set.seed(1)
modelo = glm(admitido ~ letras, family = binomial, data = data3)
modelor = glm(admitido ~ letras + ciencias, family = binomial, data = data3)
modelog = glm(admitido ~ letras + ciencias + prestigio, family = binomial, data = data3)

library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(modelo, modelor,modelog, type = "text")
## 
## ===============================================
##                        Dependent variable:     
##                   -----------------------------
##                             admitido           
##                      (1)       (2)       (3)   
## -----------------------------------------------
## letras            0.004***   0.003**   0.002** 
##                    (0.001)   (0.001)   (0.001) 
##                                                
## ciencias                     0.008**   0.008** 
##                              (0.003)   (0.003) 
##                                                
## prestigio.L                           1.189*** 
##                                        (0.287) 
##                                                
## prestigio.Q                             0.232  
##                                        (0.252) 
##                                                
## prestigio.C                            -0.099  
##                                        (0.212) 
##                                                
## Constant          -2.901*** -4.949*** -4.882***
##                    (0.606)   (1.075)   (1.113) 
##                                                
## -----------------------------------------------
## Observations         400       400       400   
## Log Likelihood    -243.028  -240.172  -229.259 
## Akaike Inf. Crit.  490.056   486.344   470.517 
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
library(modelsummary)
modelsummary(modelog,
             exponentiate = T, 
             statistic = 'conf.int',
             title = "Regresión Logísticas (Coeficientes Exponenciados)",
             stars = TRUE,
             output = "kableExtra")
Regresión Logísticas (Coeficientes Exponenciados)
 (1)
(Intercept) 0.008***
[0.001, 0.064]
letras 1.002*
[1.000, 1.004]
ciencias 1.008*
[1.002, 1.015]
prestigio.L 3.285***
[1.897, 5.879]
prestigio.Q 1.261
[0.764, 2.057]
prestigio.C 0.906
[0.595, 1.369]
Num.Obs. 400
AIC 470.5
BIC 494.5
Log.Lik. -229.259
F 7.228
RMSE 0.44
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001