Cargue de los datos

library(haven)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
DatosT1 <- read_dta("C:/Users/leonardo/Dropbox/MPH/TALLER_1/DatosT1.dta")
View(DatosT1)

Etiquetado de factores

DatosT1$sexo<-factor(DatosT1$sexo, levels = c(1,2),
                     labels = c("Hombre", "Mujer"))

DatosT1$estado_civil<-factor(DatosT1$estado_civil, levels = c(1,2,3,4),
                     labels = c("Casado/U.Libre", "No casado/No U. LIbre",
                                "Separado/Viudo", "Soltero"))

DatosT1$regimen_salud<-factor(DatosT1$regimen_salud, levels = c(1,2,3,4,5),
                             labels = c("Contributivo", 
                                        "Especial", 
                                        "Subsidiado", 
                                        "No sabe", 
                                        "No afiliado"))

DatosT1$estado_salud<-factor(DatosT1$estado_salud, levels = c(1,2,3,4),
                              labels = c("Muy bueno", 
                                         "Bueno", 
                                         "Regular", 
                                         "Malo"))

DatosT1$consulta_al_medico<-factor(DatosT1$consulta_al_medico, 
                                   levels = c(1,2),
                             labels = c("Si", 
                                        "No"))

DatosT1$calidad_SSalud<-factor(DatosT1$calidad_SSalud, 
                                   levels = c(1,2,3),
                               labels = c("Bueno", "Regular", "Malo"))
                               
DatosT1$problema_salud<-factor(DatosT1$problema_salud, 
                               levels = c(1,2),
                               labels = c("Si", "No"))

DatosT1$solucion_problema_salud<-factor(DatosT1$solucion_problema_salud, 
                               levels = c(1,2,3,4,5,6,7,8),
                               labels = c("IPS",
                                          "Particular",
                                          "Farmacia",
                                          "Tegua",
                                          "Alternativo",
                                          "Casero", 
                                          "Autoreceto",
                                          "Nada"))

DatosT1$razon_noAfiliado<-factor(DatosT1$razon_noAfiliado, 
                                        levels = c(1,2,3,4,5,6),
                                        labels = c("Falta dinero",
                                                   "Mucho tramite",
                                                   "No interes",
                                                   "No sabe",
                                                   "No vinculo laboral",
                                                   "En tramite" 
                                                   ))

Creación de la tabla 1

library(tableone)                               
dput(names(DatosT1))
## c("edad", "sexo", "estado_civil", "regimen_salud", "estado_salud", 
## "consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud", 
## "tiempo_espera", "razon_noAfiliado", "ingreso", "contador", "salario"
## )
variables<-c("edad","estado_civil", "regimen_salud", "estado_salud", 
  "consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud", 
  "tiempo_espera", "razon_noAfiliado", "ingreso", "contador", "salario"
)

factores<-c("estado_civil", "regimen_salud", "estado_salud", 
            "consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud", 
            "razon_noAfiliado"
)

tablaunosexo<-CreateTableOne(vars=variables, data=DatosT1, strata="sexo", 
                             factorVars = factores)
## Warning in CreateTableOne(vars = variables, data = DatosT1, strata = "sexo", : Dropping variable(s) tiempo_espera  due to unsupported class.
print(tablaunosexo, justify="left")
##                              Stratified by sexo
##                               Hombre                  Mujer                  
##   n                                10608                   13395             
##   edad (mean (SD))                 41.69 (18.28)           43.92 (18.75)     
##   estado_civil (%)                                                           
##      Casado/U.Libre                 4609 (43.4)             5062 (37.8)      
##      No casado/No U. LIbre          2487 (23.4)             2512 (18.8)      
##      Separado/Viudo                  222 ( 2.1)             1427 (10.7)      
##      Soltero                        3290 (31.0)             4394 (32.8)      
##   regimen_salud (%)                                                          
##      Contributivo                   4652 (43.9)             3695 (27.6)      
##      Especial                       1584 (14.9)             4225 (31.5)      
##      Subsidiado                     3230 (30.4)             4422 (33.0)      
##      No sabe                          77 ( 0.7)               50 ( 0.4)      
##      No afiliado                    1065 (10.0)             1003 ( 7.5)      
##   estado_salud (%)                                                           
##      Muy bueno                      1480 (14.0)             1632 (12.2)      
##      Bueno                          7084 (66.8)             8463 (63.2)      
##      Regular                        1812 (17.1)             3012 (22.5)      
##      Malo                            232 ( 2.2)              288 ( 2.2)      
##   consulta_al_medico = No (%)       3870 (36.5)             4399 (32.8)      
##   calidad_SSalud (%)                                                         
##      Bueno                          1270 (12.0)             1940 (14.5)      
##      Regular                        7993 (75.3)            10111 (75.5)      
##      Malo                           1345 (12.7)             1344 (10.0)      
##   problema_salud = No (%)           9563 (90.1)            11613 (86.7)      
##   solucion_problema_salud (%)                                                
##      IPS                             661 (63.3)             1108 (62.2)      
##      Particular                       43 ( 4.1)              102 ( 5.7)      
##      Farmacia                         64 ( 6.1)              105 ( 5.9)      
##      Tegua                           118 (11.3)              234 (13.1)      
##      Alternativo                      39 ( 3.7)               64 ( 3.6)      
##      Casero                            5 ( 0.5)                7 ( 0.4)      
##      Autoreceto                       50 ( 4.8)               63 ( 3.5)      
##      Nada                             65 ( 6.2)               99 ( 5.6)      
##   razon_noAfiliado (%)                                                       
##      Falta dinero                     38 ( 3.6)               49 ( 4.9)      
##      Mucho tramite                   110 (10.3)              123 (12.3)      
##      No interes                      378 (35.5)              400 (39.9)      
##      No sabe                         219 (20.6)              201 (20.0)      
##      No vinculo laboral               84 ( 7.9)               74 ( 7.4)      
##      En tramite                      236 (22.2)              156 (15.6)      
##   ingreso (mean (SD))         2419113.56 (1175013.47) 2424520.33 (1177695.29)
##   contador (mean (SD))              1.00 (0.00)             1.00 (0.00)      
##   salario (%)                                                                
##      > 5 SMMLV                      1554 (14.6)             1915 (14.3)      
##      0 a 1 SMMLV                     875 ( 8.2)             1121 ( 8.4)      
##      1 y 2 SMMLV                    1424 (13.4)             1785 (13.3)      
##      2 y 3 SMMLV                    2380 (22.4)             2875 (21.5)      
##      3 y 4 SMMLV                    2505 (23.6)             3211 (24.0)      
##      4 y 5 SMMLV                    1870 (17.6)             2488 (18.6)      
##                              Stratified by sexo
##                               p      test
##   n                                      
##   edad (mean (SD))            <0.001     
##   estado_civil (%)            <0.001     
##      Casado/U.Libre                      
##      No casado/No U. LIbre               
##      Separado/Viudo                      
##      Soltero                             
##   regimen_salud (%)           <0.001     
##      Contributivo                        
##      Especial                            
##      Subsidiado                          
##      No sabe                             
##      No afiliado                         
##   estado_salud (%)            <0.001     
##      Muy bueno                           
##      Bueno                               
##      Regular                             
##      Malo                                
##   consulta_al_medico = No (%) <0.001     
##   calidad_SSalud (%)          <0.001     
##      Bueno                               
##      Regular                             
##      Malo                                
##   problema_salud = No (%)     <0.001     
##   solucion_problema_salud (%)  0.289     
##      IPS                                 
##      Particular                          
##      Farmacia                            
##      Tegua                               
##      Alternativo                         
##      Casero                              
##      Autoreceto                          
##      Nada                                
##   razon_noAfiliado (%)         0.002     
##      Falta dinero                        
##      Mucho tramite                       
##      No interes                          
##      No sabe                             
##      No vinculo laboral                  
##      En tramite                          
##   ingreso (mean (SD))          0.724     
##   contador (mean (SD))         NaN       
##   salario (%)                  0.265     
##      > 5 SMMLV                           
##      0 a 1 SMMLV                         
##      1 y 2 SMMLV                         
##      2 y 3 SMMLV                         
##      3 y 4 SMMLV                         
##      4 y 5 SMMLV

Resumen de toda la población

DatosT1 %>% summarise (
  Numero= n(), 
  Mujeres = length(which(sexo == "Mujer"))/n(), 
  Hombres = length(which(sexo == "Hombre"))/n(), 
  Edad= mean(edad, na.rm=T),
  EdadSD =sd(edad),
  Ingreso = mean(ingreso, na.rm = T)
)
## # A tibble: 1 x 6
##   Numero Mujeres Hombres  Edad EdadSD  Ingreso
##    <int>   <dbl>   <dbl> <dbl>  <dbl>    <dbl>
## 1  24003   0.558   0.442  42.9   18.6 2422131.
table(DatosT1$regimen_salud)/length(DatosT1$regimen_salud)
## 
## Contributivo     Especial   Subsidiado      No sabe  No afiliado 
##  0.347748198  0.242011415  0.318793484  0.005291005  0.086155897

Estimación puntual edad

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(fitdistrplus)
## Loading required package: survival
## Loading required package: npsurv
## Loading required package: lsei
datoedad<-as.numeric(DatosT1$edad)
plotdist(datoedad, histo = T, demp = T)

descdist(datoedad)

## summary statistics
## ------
## min:  16   max:  99 
## median:  41 
## mean:  42.93592 
## estimated sd:  18.57588 
## estimated skewness:  0.4283122 
## estimated kurtosis:  2.288087
boxplot(datoedad)

fitdistr(DatosT1$edad, densfun = "Normal")
##       mean           sd     
##   42.93592468   18.57549719 
##  ( 0.11989683) ( 0.08477986)

Pruebas de hipotesis

var.test(edad~sexo, data=DatosT1)
## 
##  F test to compare two variances
## 
## data:  edad by sexo
## F = 0.95118, num df = 10607, denom df = 13394, p-value = 0.006535
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9175519 0.9861085
## sample estimates:
## ratio of variances 
##          0.9511764
t.test(edad~sexo, data = DatosT1, 
       alternative = "two.sided", conf.level=0.95, paired = F, 
       var.equal=F)
## 
##  Welch Two Sample t-test
## 
## data:  edad by sexo
## t = -9.3073, df = 22998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.707607 -1.765577
## sample estimates:
## mean in group Hombre  mean in group Mujer 
##             41.68778             43.92437
var.test(ingreso~sexo, data=DatosT1)
## 
##  F test to compare two variances
## 
## data:  ingreso by sexo
## F = 0.99545, num df = 10607, denom df = 13394, p-value = 0.8047
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9602612 1.0320089
## sample estimates:
## ratio of variances 
##          0.9954508
t.test(ingreso~sexo, data=DatosT1, conf.level=0.95, paired=F, 
       var.equal=T)
## 
##  Two Sample t-test
## 
## data:  ingreso by sexo
## t = -0.35359, df = 24001, p-value = 0.7237
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -35378.42  24564.87
## sample estimates:
## mean in group Hombre  mean in group Mujer 
##              2419114              2424520

Proporciones

DatosT1 %>% summarise(
  Total=length(regimen_salud),
 Contributivo=length(which(regimen_salud=="Contributivo")),
 Proporcion = Contributivo/Total,
  Total_hombres = length(which(sexo == "Hombre")),
  Total_mujeres = length(which(sexo == "Mujer")),
  Contri.Hombres = length(which(regimen_salud=="Contributivo"&sexo=="Hombre")), 
  Contri.mujeres = length(which(regimen_salud=="Contributivo"&sexo=="Mujer"))
)
## # A tibble: 1 x 7
##   Total Contributivo Proporcion Total_hombres Total_mujeres Contri.Hombres
##   <int>        <int>      <dbl>         <int>         <int>          <int>
## 1 24003         8347      0.348         10608         13395           4652
## # ... with 1 more variable: Contri.mujeres <int>

Utilizando una prueba Z para una proporción

p_1<-0.348 
p_0<-0.5 
num_total<-24003 
zprop_con<-(p_1-p_0)/sqrt(p_0*((1-p_0)/num_total))
zprop_con
## [1] -47.09842
pnorm(-47.09842)
## [1] 0

Hombres y mujeres

prop_hombres_con<-0.4385
prop_mujere_con<-0.2758 
num_hombres<- 10608 
num_mujeres<-13395
p_pool<-(4652+3695)/(10608+13395)
SE_p<-sqrt((p_pool*(1 - p_pool))*((1/10608)+(1/13395)))
dif_prop<-prop_hombres_con - prop_mujere_con
z_prop<-dif_prop/SE_p
z_prop
## [1] 26.28469

Utilizando prop.test

prop.test(x=8347, n = 24003, p=0.5, alternative = "two.sided")
## 
##  1-sample proportions test with continuity correction
## 
## data:  8347 out of 24003, null probability 0.5
## X-squared = 2225, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.3417273 0.3538180
## sample estimates:
##         p 
## 0.3477482
prop.test(x=c(4652,3695), n=c(10608,13395), alternative = "two.sided",
          conf.level = 0.95)
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(4652, 3695) out of c(10608, 13395)
## X-squared = 690.06, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.1505016 0.1748739
## sample estimates:
##    prop 1    prop 2 
## 0.4385370 0.2758492

Intervalo de confianza

prom_ingreso<-mean(DatosT1$ingreso)
prom_ingreso
## [1] 2422131
t_critico<-qt(0.975, 24002)
t_critico
## [1] 1.960063
s_ingreso<-sd(DatosT1$ingreso)
SE_ing<-s_ingreso/sqrt(24003)

izquierdo<-prom_ingreso-(t_critico*(SE_ing))
izquierdo
## [1] 2407247
derecho<-prom_ingreso+(t_critico*(SE_ing))
derecho
## [1] 2437015