Cargue de los datos
library(haven)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
DatosT1 <- read_dta("C:/Users/leonardo/Dropbox/MPH/TALLER_1/DatosT1.dta")
View(DatosT1)
Etiquetado de factores
DatosT1$sexo<-factor(DatosT1$sexo, levels = c(1,2),
labels = c("Hombre", "Mujer"))
DatosT1$estado_civil<-factor(DatosT1$estado_civil, levels = c(1,2,3,4),
labels = c("Casado/U.Libre", "No casado/No U. LIbre",
"Separado/Viudo", "Soltero"))
DatosT1$regimen_salud<-factor(DatosT1$regimen_salud, levels = c(1,2,3,4,5),
labels = c("Contributivo",
"Especial",
"Subsidiado",
"No sabe",
"No afiliado"))
DatosT1$estado_salud<-factor(DatosT1$estado_salud, levels = c(1,2,3,4),
labels = c("Muy bueno",
"Bueno",
"Regular",
"Malo"))
DatosT1$consulta_al_medico<-factor(DatosT1$consulta_al_medico,
levels = c(1,2),
labels = c("Si",
"No"))
DatosT1$calidad_SSalud<-factor(DatosT1$calidad_SSalud,
levels = c(1,2,3),
labels = c("Bueno", "Regular", "Malo"))
DatosT1$problema_salud<-factor(DatosT1$problema_salud,
levels = c(1,2),
labels = c("Si", "No"))
DatosT1$solucion_problema_salud<-factor(DatosT1$solucion_problema_salud,
levels = c(1,2,3,4,5,6,7,8),
labels = c("IPS",
"Particular",
"Farmacia",
"Tegua",
"Alternativo",
"Casero",
"Autoreceto",
"Nada"))
DatosT1$razon_noAfiliado<-factor(DatosT1$razon_noAfiliado,
levels = c(1,2,3,4,5,6),
labels = c("Falta dinero",
"Mucho tramite",
"No interes",
"No sabe",
"No vinculo laboral",
"En tramite"
))
Creación de la tabla 1
library(tableone)
dput(names(DatosT1))
## c("edad", "sexo", "estado_civil", "regimen_salud", "estado_salud",
## "consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud",
## "tiempo_espera", "razon_noAfiliado", "ingreso", "contador", "salario"
## )
variables<-c("edad","estado_civil", "regimen_salud", "estado_salud",
"consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud",
"tiempo_espera", "razon_noAfiliado", "ingreso", "contador", "salario"
)
factores<-c("estado_civil", "regimen_salud", "estado_salud",
"consulta_al_medico", "calidad_SSalud", "problema_salud", "solucion_problema_salud",
"razon_noAfiliado"
)
tablaunosexo<-CreateTableOne(vars=variables, data=DatosT1, strata="sexo",
factorVars = factores)
## Warning in CreateTableOne(vars = variables, data = DatosT1, strata = "sexo", : Dropping variable(s) tiempo_espera due to unsupported class.
print(tablaunosexo, justify="left")
## Stratified by sexo
## Hombre Mujer
## n 10608 13395
## edad (mean (SD)) 41.69 (18.28) 43.92 (18.75)
## estado_civil (%)
## Casado/U.Libre 4609 (43.4) 5062 (37.8)
## No casado/No U. LIbre 2487 (23.4) 2512 (18.8)
## Separado/Viudo 222 ( 2.1) 1427 (10.7)
## Soltero 3290 (31.0) 4394 (32.8)
## regimen_salud (%)
## Contributivo 4652 (43.9) 3695 (27.6)
## Especial 1584 (14.9) 4225 (31.5)
## Subsidiado 3230 (30.4) 4422 (33.0)
## No sabe 77 ( 0.7) 50 ( 0.4)
## No afiliado 1065 (10.0) 1003 ( 7.5)
## estado_salud (%)
## Muy bueno 1480 (14.0) 1632 (12.2)
## Bueno 7084 (66.8) 8463 (63.2)
## Regular 1812 (17.1) 3012 (22.5)
## Malo 232 ( 2.2) 288 ( 2.2)
## consulta_al_medico = No (%) 3870 (36.5) 4399 (32.8)
## calidad_SSalud (%)
## Bueno 1270 (12.0) 1940 (14.5)
## Regular 7993 (75.3) 10111 (75.5)
## Malo 1345 (12.7) 1344 (10.0)
## problema_salud = No (%) 9563 (90.1) 11613 (86.7)
## solucion_problema_salud (%)
## IPS 661 (63.3) 1108 (62.2)
## Particular 43 ( 4.1) 102 ( 5.7)
## Farmacia 64 ( 6.1) 105 ( 5.9)
## Tegua 118 (11.3) 234 (13.1)
## Alternativo 39 ( 3.7) 64 ( 3.6)
## Casero 5 ( 0.5) 7 ( 0.4)
## Autoreceto 50 ( 4.8) 63 ( 3.5)
## Nada 65 ( 6.2) 99 ( 5.6)
## razon_noAfiliado (%)
## Falta dinero 38 ( 3.6) 49 ( 4.9)
## Mucho tramite 110 (10.3) 123 (12.3)
## No interes 378 (35.5) 400 (39.9)
## No sabe 219 (20.6) 201 (20.0)
## No vinculo laboral 84 ( 7.9) 74 ( 7.4)
## En tramite 236 (22.2) 156 (15.6)
## ingreso (mean (SD)) 2419113.56 (1175013.47) 2424520.33 (1177695.29)
## contador (mean (SD)) 1.00 (0.00) 1.00 (0.00)
## salario (%)
## > 5 SMMLV 1554 (14.6) 1915 (14.3)
## 0 a 1 SMMLV 875 ( 8.2) 1121 ( 8.4)
## 1 y 2 SMMLV 1424 (13.4) 1785 (13.3)
## 2 y 3 SMMLV 2380 (22.4) 2875 (21.5)
## 3 y 4 SMMLV 2505 (23.6) 3211 (24.0)
## 4 y 5 SMMLV 1870 (17.6) 2488 (18.6)
## Stratified by sexo
## p test
## n
## edad (mean (SD)) <0.001
## estado_civil (%) <0.001
## Casado/U.Libre
## No casado/No U. LIbre
## Separado/Viudo
## Soltero
## regimen_salud (%) <0.001
## Contributivo
## Especial
## Subsidiado
## No sabe
## No afiliado
## estado_salud (%) <0.001
## Muy bueno
## Bueno
## Regular
## Malo
## consulta_al_medico = No (%) <0.001
## calidad_SSalud (%) <0.001
## Bueno
## Regular
## Malo
## problema_salud = No (%) <0.001
## solucion_problema_salud (%) 0.289
## IPS
## Particular
## Farmacia
## Tegua
## Alternativo
## Casero
## Autoreceto
## Nada
## razon_noAfiliado (%) 0.002
## Falta dinero
## Mucho tramite
## No interes
## No sabe
## No vinculo laboral
## En tramite
## ingreso (mean (SD)) 0.724
## contador (mean (SD)) NaN
## salario (%) 0.265
## > 5 SMMLV
## 0 a 1 SMMLV
## 1 y 2 SMMLV
## 2 y 3 SMMLV
## 3 y 4 SMMLV
## 4 y 5 SMMLV
Resumen de toda la población
DatosT1 %>% summarise (
Numero= n(),
Mujeres = length(which(sexo == "Mujer"))/n(),
Hombres = length(which(sexo == "Hombre"))/n(),
Edad= mean(edad, na.rm=T),
EdadSD =sd(edad),
Ingreso = mean(ingreso, na.rm = T)
)
## # A tibble: 1 x 6
## Numero Mujeres Hombres Edad EdadSD Ingreso
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 24003 0.558 0.442 42.9 18.6 2422131.
table(DatosT1$regimen_salud)/length(DatosT1$regimen_salud)
##
## Contributivo Especial Subsidiado No sabe No afiliado
## 0.347748198 0.242011415 0.318793484 0.005291005 0.086155897
Estimación puntual edad
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(fitdistrplus)
## Loading required package: survival
## Loading required package: npsurv
## Loading required package: lsei
datoedad<-as.numeric(DatosT1$edad)
plotdist(datoedad, histo = T, demp = T)

descdist(datoedad)

## summary statistics
## ------
## min: 16 max: 99
## median: 41
## mean: 42.93592
## estimated sd: 18.57588
## estimated skewness: 0.4283122
## estimated kurtosis: 2.288087
boxplot(datoedad)

fitdistr(DatosT1$edad, densfun = "Normal")
## mean sd
## 42.93592468 18.57549719
## ( 0.11989683) ( 0.08477986)
Pruebas de hipotesis
var.test(edad~sexo, data=DatosT1)
##
## F test to compare two variances
##
## data: edad by sexo
## F = 0.95118, num df = 10607, denom df = 13394, p-value = 0.006535
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.9175519 0.9861085
## sample estimates:
## ratio of variances
## 0.9511764
t.test(edad~sexo, data = DatosT1,
alternative = "two.sided", conf.level=0.95, paired = F,
var.equal=F)
##
## Welch Two Sample t-test
##
## data: edad by sexo
## t = -9.3073, df = 22998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.707607 -1.765577
## sample estimates:
## mean in group Hombre mean in group Mujer
## 41.68778 43.92437
var.test(ingreso~sexo, data=DatosT1)
##
## F test to compare two variances
##
## data: ingreso by sexo
## F = 0.99545, num df = 10607, denom df = 13394, p-value = 0.8047
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.9602612 1.0320089
## sample estimates:
## ratio of variances
## 0.9954508
t.test(ingreso~sexo, data=DatosT1, conf.level=0.95, paired=F,
var.equal=T)
##
## Two Sample t-test
##
## data: ingreso by sexo
## t = -0.35359, df = 24001, p-value = 0.7237
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -35378.42 24564.87
## sample estimates:
## mean in group Hombre mean in group Mujer
## 2419114 2424520
Proporciones
DatosT1 %>% summarise(
Total=length(regimen_salud),
Contributivo=length(which(regimen_salud=="Contributivo")),
Proporcion = Contributivo/Total,
Total_hombres = length(which(sexo == "Hombre")),
Total_mujeres = length(which(sexo == "Mujer")),
Contri.Hombres = length(which(regimen_salud=="Contributivo"&sexo=="Hombre")),
Contri.mujeres = length(which(regimen_salud=="Contributivo"&sexo=="Mujer"))
)
## # A tibble: 1 x 7
## Total Contributivo Proporcion Total_hombres Total_mujeres Contri.Hombres
## <int> <int> <dbl> <int> <int> <int>
## 1 24003 8347 0.348 10608 13395 4652
## # ... with 1 more variable: Contri.mujeres <int>
Utilizando una prueba Z para una proporción
p_1<-0.348
p_0<-0.5
num_total<-24003
zprop_con<-(p_1-p_0)/sqrt(p_0*((1-p_0)/num_total))
zprop_con
## [1] -47.09842
pnorm(-47.09842)
## [1] 0
Hombres y mujeres
prop_hombres_con<-0.4385
prop_mujere_con<-0.2758
num_hombres<- 10608
num_mujeres<-13395
p_pool<-(4652+3695)/(10608+13395)
SE_p<-sqrt((p_pool*(1 - p_pool))*((1/10608)+(1/13395)))
dif_prop<-prop_hombres_con - prop_mujere_con
z_prop<-dif_prop/SE_p
z_prop
## [1] 26.28469
Utilizando prop.test
prop.test(x=8347, n = 24003, p=0.5, alternative = "two.sided")
##
## 1-sample proportions test with continuity correction
##
## data: 8347 out of 24003, null probability 0.5
## X-squared = 2225, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.3417273 0.3538180
## sample estimates:
## p
## 0.3477482
prop.test(x=c(4652,3695), n=c(10608,13395), alternative = "two.sided",
conf.level = 0.95)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(4652, 3695) out of c(10608, 13395)
## X-squared = 690.06, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1505016 0.1748739
## sample estimates:
## prop 1 prop 2
## 0.4385370 0.2758492
Intervalo de confianza
prom_ingreso<-mean(DatosT1$ingreso)
prom_ingreso
## [1] 2422131
t_critico<-qt(0.975, 24002)
t_critico
## [1] 1.960063
s_ingreso<-sd(DatosT1$ingreso)
SE_ing<-s_ingreso/sqrt(24003)
izquierdo<-prom_ingreso-(t_critico*(SE_ing))
izquierdo
## [1] 2407247
derecho<-prom_ingreso+(t_critico*(SE_ing))
derecho
## [1] 2437015