Cargar Datos y Paquetes Necesarios

library(ggplot2)

load("lapop.rda")
load("dfm.rda")
names(dfm)

## [1] "edad" "asp"

Obtener los Parámetros de la Variable Edad

length(dfm$edad)

## [1] 131872

mean(dfm$edad)

## [1] 38.5846

sd(dfm$edad)

## [1] 15.51605

hist(dfm$edad)

Ejecutamos la función para generar muestras

# Funcion distribución de muestreo para medias
# muestra(v, n, r)
# donde: v = variable; n = tamaño de muestra; r = número de muestras

muestra <- function(v, n, r){
  id <- 0
  m <- numeric()
  desv <- numeric()
  tam <- numeric()
  err.std <- numeric()
  linf <- numeric()
  lsup <- numeric()
  repeat{
    id <- id+1
    sam <- sample(v, n, replace=TRUE)
    m1 <- mean(sam)
    desv1 <- sd(sam)
    tam1 <- length(sam)
    err.st1 <- desv1/sqrt(tam1)
    linf1 <- m1 - (2*err.st1)
    lsup1 <- m1 + (2*err.st1)
    m <- c(m, m1)
    desv <- c(desv, desv1)
    tam <- c(tam, tam1)
    err.std <- c(err.std, err.st1)
    linf <- c(linf, linf1)
    lsup <- c(lsup, lsup1)
    int <- ifelse(linf <= mean(v) & lsup >= mean(v), TRUE, FALSE)
    xbar <- mean(v)
    error <- m - xbar
    mues <- data.frame(m, desv, tam, err.std, linf, lsup, xbar, error, int) 
    if(id >= r) break
  }
  return(mues)}

Pedimos una muestra de 500 casos de la variable edad

muestra(dfm$edad, 500, 1)

##       m     desv tam   err.std     linf     lsup    xbar      error  int
## 1 37.97 15.68455 500 0.7014344 36.56713 39.37287 38.5846 -0.6146048 TRUE

Pedimos 10 muestreos repetidos de 500 casos de la variable edad

muestra(dfm$edad, 500, 10)

##         m     desv tam   err.std     linf     lsup    xbar      error
## 1  41.064 16.35049 500 0.7312162 39.60157 42.52643 38.5846  2.4793952
## 2  37.744 15.57921 500 0.6967236 36.35055 39.13745 38.5846 -0.8406048
## 3  37.534 14.40708 500 0.6443042 36.24539 38.82261 38.5846 -1.0506048
## 4  37.914 15.48801 500 0.6926447 36.52871 39.29929 38.5846 -0.6706048
## 5  38.826 15.50356 500 0.6933404 37.43932 40.21268 38.5846  0.2413952
## 6  39.034 15.22157 500 0.6807294 37.67254 40.39546 38.5846  0.4493952
## 7  38.438 15.27537 500 0.6831352 37.07173 39.80427 38.5846 -0.1466048
## 8  38.000 15.64178 500 0.6995217 36.60096 39.39904 38.5846 -0.5846048
## 9  38.862 15.11661 500 0.6760355 37.50993 40.21407 38.5846  0.2773952
## 10 37.718 15.38701 500 0.6881279 36.34174 39.09426 38.5846 -0.8666048
##      int
## 1  FALSE
## 2   TRUE
## 3   TRUE
## 4   TRUE
## 5   TRUE
## 6   TRUE
## 7   TRUE
## 8   TRUE
## 9   TRUE
## 10  TRUE

Qué tal si pedimos 100 muestras?

muestras100 <- muestra(dfm$edad, 500, 100)

head(muestras100, 50) # las 50 primeras muestras

##         m     desv tam   err.std     linf     lsup    xbar       error
## 1  38.452 15.57758 500 0.6966506 37.05870 39.84530 38.5846 -0.13260477
## 2  38.680 15.09614 500 0.6751198 37.32976 40.03024 38.5846  0.09539523
## 3  39.286 15.44050 500 0.6905201 37.90496 40.66704 38.5846  0.70139523
## 4  39.274 15.97791 500 0.7145537 37.84489 40.70311 38.5846  0.68939523
## 5  39.000 15.50732 500 0.6935084 37.61298 40.38702 38.5846  0.41539523
## 6  38.732 15.39305 500 0.6883982 37.35520 40.10880 38.5846  0.14739523
## 7  38.680 15.74565 500 0.7041668 37.27167 40.08833 38.5846  0.09539523
## 8  37.548 15.08359 500 0.6745587 36.19888 38.89712 38.5846 -1.03660477
## 9  37.970 15.22500 500 0.6808828 36.60823 39.33177 38.5846 -0.61460477
## 10 38.886 16.13685 500 0.7216617 37.44268 40.32932 38.5846  0.30139523
## 11 39.360 15.92676 500 0.7122664 37.93547 40.78453 38.5846  0.77539523
## 12 38.360 14.95452 500 0.6687865 37.02243 39.69757 38.5846 -0.22460477
## 13 38.966 15.79201 500 0.7062402 37.55352 40.37848 38.5846  0.38139523
## 14 39.098 15.88840 500 0.7105509 37.67690 40.51910 38.5846  0.51339523
## 15 39.080 16.44453 500 0.7354216 37.60916 40.55084 38.5846  0.49539523
## 16 38.762 15.48693 500 0.6925966 37.37681 40.14719 38.5846  0.17739523
## 17 37.730 15.76230 500 0.7049113 36.32018 39.13982 38.5846 -0.85460477
## 18 38.080 14.75972 500 0.6600747 36.75985 39.40015 38.5846 -0.50460477
## 19 38.952 15.41834 500 0.6895290 37.57294 40.33106 38.5846  0.36739523
## 20 39.350 15.57616 500 0.6965870 37.95683 40.74317 38.5846  0.76539523
## 21 40.296 15.55185 500 0.6955000 38.90500 41.68700 38.5846  1.71139523
## 22 38.712 15.87353 500 0.7098859 37.29223 40.13177 38.5846  0.12739523
## 23 39.920 15.92510 500 0.7121921 38.49562 41.34438 38.5846  1.33539523
## 24 39.344 15.93001 500 0.7124117 37.91918 40.76882 38.5846  0.75939523
## 25 38.040 15.45692 500 0.6912543 36.65749 39.42251 38.5846 -0.54460477
## 26 38.568 14.84622 500 0.6639431 37.24011 39.89589 38.5846 -0.01660477
## 27 37.810 15.46999 500 0.6918389 36.42632 39.19368 38.5846 -0.77460477
## 28 38.110 14.86243 500 0.6646679 36.78066 39.43934 38.5846 -0.47460477
## 29 37.586 15.29216 500 0.6838864 36.21823 38.95377 38.5846 -0.99860477
## 30 37.492 14.32286 500 0.6405376 36.21092 38.77308 38.5846 -1.09260477
## 31 38.926 15.45335 500 0.6910950 37.54381 40.30819 38.5846  0.34139523
## 32 38.318 14.91865 500 0.6671821 36.98364 39.65236 38.5846 -0.26660477
## 33 39.034 15.72741 500 0.7033513 37.62730 40.44070 38.5846  0.44939523
## 34 38.566 15.11498 500 0.6759624 37.21408 39.91792 38.5846 -0.01860477
## 35 37.564 15.28021 500 0.6833518 36.19730 38.93070 38.5846 -1.02060477
## 36 38.018 14.98194 500 0.6700128 36.67797 39.35803 38.5846 -0.56660477
## 37 39.608 16.22754 500 0.7257176 38.15656 41.05944 38.5846  1.02339523
## 38 38.934 15.90432 500 0.7112630 37.51147 40.35653 38.5846  0.34939523
## 39 38.740 14.69000 500 0.6569566 37.42609 40.05391 38.5846  0.15539523
## 40 39.212 16.45858 500 0.7360500 37.73990 40.68410 38.5846  0.62739523
## 41 37.868 14.98933 500 0.6703431 36.52731 39.20869 38.5846 -0.71660477
## 42 39.380 15.45177 500 0.6910240 37.99795 40.76205 38.5846  0.79539523
## 43 37.916 15.48821 500 0.6926539 36.53069 39.30131 38.5846 -0.66860477
## 44 38.320 15.71597 500 0.7028394 36.91432 39.72568 38.5846 -0.26460477
## 45 38.616 15.20457 500 0.6799690 37.25606 39.97594 38.5846  0.03139523
## 46 38.982 16.47898 500 0.7369624 37.50808 40.45592 38.5846  0.39739523
## 47 38.840 15.87622 500 0.7100064 37.41999 40.26001 38.5846  0.25539523
## 48 36.912 14.75343 500 0.6597933 35.59241 38.23159 38.5846 -1.67260477
## 49 39.588 15.60774 500 0.6979992 38.19200 40.98400 38.5846  1.00339523
## 50 39.406 16.58503 500 0.7417052 37.92259 40.88941 38.5846  0.82139523
##      int
## 1   TRUE
## 2   TRUE
## 3   TRUE
## 4   TRUE
## 5   TRUE
## 6   TRUE
## 7   TRUE
## 8   TRUE
## 9   TRUE
## 10  TRUE
## 11  TRUE
## 12  TRUE
## 13  TRUE
## 14  TRUE
## 15  TRUE
## 16  TRUE
## 17  TRUE
## 18  TRUE
## 19  TRUE
## 20  TRUE
## 21 FALSE
## 22  TRUE
## 23  TRUE
## 24  TRUE
## 25  TRUE
## 26  TRUE
## 27  TRUE
## 28  TRUE
## 29  TRUE
## 30  TRUE
## 31  TRUE
## 32  TRUE
## 33  TRUE
## 34  TRUE
## 35  TRUE
## 36  TRUE
## 37  TRUE
## 38  TRUE
## 39  TRUE
## 40  TRUE
## 41  TRUE
## 42  TRUE
## 43  TRUE
## 44  TRUE
## 45  TRUE
## 46  TRUE
## 47  TRUE
## 48 FALSE
## 49  TRUE
## 50  TRUE

hist(muestras100$m)

Y si pedimos 1000 muestras?

muestras1000 <- muestra(dfm$edad, 500, 1000)

hist(muestras1000$m)

10000 muestras?

muestras10000 <- muestra(dfm$edad, 500, 10000)

hist(muestras10000$m)

Cuántas muestras tienen un intervalo de confianza que no incluye al parámetro?

table(muestras100$int)

## 
## FALSE  TRUE 
##     2    98

table(muestras1000$int)

## 
## FALSE  TRUE 
##    51   949

table(muestras10000$int)

## 
## FALSE  TRUE 
##   466  9534

Intervalos de Confianza con summarySE y Gráficos con ggplot

Datos

library(Rmisc)

## Loading required package: lattice
## Loading required package: plyr

# Creamos un subconjunto con los datos de la Encuesta de Perú 2012

lap.pe12 <- subset(lapop, pais=="Perú" & year=="2012")

Estadísticos descriptivos del índice de apoyo al sistema político:

mean(na.omit(lap.pe12$asp))

## [1] 49.448

sd(na.omit(lap.pe12$asp))

## [1] 18.22805

length(na.omit(lap.pe12$asp))

## [1] 1407

Calculo “manual” del error estándar de la media:

sd(na.omit(lap.pe12$asp)) / sqrt(length(na.omit(lap.pe12$asp)))

## [1] 0.4859518

Usando la funcion summarySE, con intervalo de confianza al 95%

summarySE(lap.pe12, measurevar="asp", na.rm=T)

##    .id    N    asp       sd        se        ci
## 1 <NA> 1407 49.448 18.22805 0.4859518 0.9532686

# Si queremos un intervalo de confianza diferente: 99%

summarySE(lap.pe12, measurevar="asp", na.rm=T, conf.interval=0.99)

##    .id    N    asp       sd        se      ci
## 1 <NA> 1407 49.448 18.22805 0.4859518 1.25343

Por grupos de casos:

# ASP por sexo:

summarySE(lap.pe12, measurevar="asp", groupvars=c("q1"), na.rm=T)

##       q1   N      asp       sd        se       ci
## 1 Hombre 710 49.16432 18.65539 0.7001242 1.374565
## 2  Mujer 697 49.73697 17.79094 0.6738799 1.323081

# ASP por Tamaño de la Localidad

summarySE(lap.pe12, measurevar="asp", groupvars=c("tamano"), na.rm=T)

##                                  tamano   N      asp       sd        se
## 1 Capital nacional (área metropolitana) 472 48.25565 17.97861 0.8275328
## 2                         Ciudad grande 320 49.19792 18.19863 1.0173345
## 3                        Ciudad mediana 157 51.78344 20.14735 1.6079337
## 4                        Ciudad pequeña 136 48.01471 17.50428 1.5009795
## 5                            Área rural 322 50.91097 17.83152 0.9937116
##         ci
## 1 1.626113
## 2 2.001533
## 3 3.176131
## 4 2.968476
## 5 1.955010

# ASP por Tamaño y Sexo

summarySE(lap.pe12, measurevar="asp", groupvars=c("q1", "tamano"), na.rm=T)

##        q1                                tamano   N      asp       sd
## 1  Hombre Capital nacional (área metropolitana) 236 48.60169 18.97301
## 2  Hombre                         Ciudad grande 163 48.69121 17.83338
## 3  Hombre                        Ciudad mediana  78 48.58974 21.07856
## 4  Hombre                        Ciudad pequeña  68 49.06863 18.01828
## 5  Hombre                            Área rural 165 50.74747 18.16572
## 6   Mujer Capital nacional (área metropolitana) 236 47.90960 16.95940
## 7   Mujer                         Ciudad grande 157 49.72399 18.61272
## 8   Mujer                        Ciudad mediana  79 54.93671 18.78701
## 9   Mujer                        Ciudad pequeña  68 46.96078 17.04287
## 10  Mujer                            Área rural 157 51.08280 17.52985
##          se       ci
## 1  1.235037 2.433160
## 2  1.396818 2.758319
## 3  2.386677 4.752481
## 4  2.185038 4.361354
## 5  1.414199 2.792386
## 6  1.103963 2.174928
## 7  1.485457 2.934204
## 8  2.113704 4.208061
## 9  2.066752 4.125254
## 10 1.399035 2.763496

Graficamos el intervalo de confianza al 95%:

Para ASP por Tamaño de Localidad

asp.pe12 <- summarySE(lap.pe12, measurevar="asp", groupvars=c("tamano"), na.rm=T)

ggplot(asp.pe12, aes(x=tamano, y=asp)) + geom_point(size=2) +
  ylim(35,65) + geom_errorbar(aes(ymin=asp-ci, ymax=asp+ci), width=0.2) +
  xlab("Tamaño de la localidad") + ylab("ASP") +
  ggtitle("LAPOP Perú 2012: Promedio del Índice de Apoyo al Sistema\n Político (ASP) según tamaño de la localidad e Intervalo\n de confianza al 95%") +
  theme_bw() + coord_flip()

ASP por páises en América Latina:

lapop12 <- subset(lapop, year=="2012")

asp.la12 <- summarySE(lapop12, measurevar="asp", groupvars=c("pais"), na.rm=T)

ggplot(asp.la12, aes(x=pais, y=asp)) + geom_point(size=2) +
  geom_errorbar(aes(ymin=asp-ci, ymax=asp+ci), width=0.2) +
  xlab("Pais") + ylab("ASP") +
  ggtitle("LAPOP 2012: Promedio del Índice de Apoyo al Sistema\n Político (ASP) según País e Intervalo\n de confianza al 95%") +
  theme_bw() + coord_flip()

# Si queremos que salgan ordenados de menos a más :

ggplot(asp.la12, aes(x=reorder(pais, asp), y=asp)) + geom_point(size=2) +
  geom_errorbar(aes(ymin=asp-ci, ymax=asp+ci), width=0.2) +
  xlab("Pais") + ylab("ASP") +
  ggtitle("LAPOP 2012: Promedio del Índice de Apoyo al Sistema\n Político (ASP) según País e Intervalo\n de confianza al 95%") +
  theme_bw() +  coord_flip()

# Si queremos que salgan ordenados de más a menos:

ggplot(asp.la12, aes(x=reorder(pais, -asp), y=asp)) + geom_point(size=2) +
  geom_errorbar(aes(ymin=asp-ci, ymax=asp+ci), width=0.2) +
  xlab("Pais") + ylab("ASP") +
  ggtitle("LAPOP 2012: Promedio del Índice de Apoyo al Sistema\n Político (ASP) según País e Intervalo\n de confianza al 95%") +
  theme_bw() + coord_flip()

Error Estándar e Intervalos de Confianza

David Sulmont - Pontificia Universidad Católica del Perú

23 de septiembre de 2015