Creamos las mismas variables usadas en la clase anterior
setwd("/Users/mau/Dropbox/Cursos mau/Maestria CEI")
data<-read.csv("BaseIP3.csv",header=TRUE)
table(data$p7_1)
##
## NS/NC No Si
## 41 8779 2180
table(data$p7_2)
##
## NS/NC No Si
## 64 8728 2208
table(data$p7_3)
##
## NS/NC No Si
## 45 10289 666
table(data$p8)
##
## Ambos NC
## 209 17
## NS No
## 24 8526
## Si, alguien de esta vivienda Si, yo he sido victima
## 945 1279
data$lcompras<-ifelse(data$p7_1=="Si",data$ponde,0)
data$lpaseo<-ifelse(data$p7_2=="Si",data$ponde,0)
data$lvive<-ifelse(data$p7_3=="Si",data$ponde,0)
data$delito<-ifelse(data$p8=="Si, alguien de esta vivienda",data$ponde,0)
data$delito<-ifelse(data$p8=="Si, yo he sido victima",data$ponde,data$delito)
data$total<-data$ponde
data2<-aggregate(cbind(lcompras,lpaseo,lvive,delito,total)~edo+muni,data=data,sum)
data2$clave<-(data2$edo*1000)+data2$muni
dataSUN<-read.csv("SUN2012.csv",header=TRUE)
data4<-merge(data2,dataSUN,by.x="clave",by.y="clave1",all.x=T, all.y=F)
data4$tipo<-ifelse(is.na(data4$tipo),0,data4$tipo)
data4$plpaseo<-100*(data4$lpaseo/data4$total)
data4$plcompras<-100*(data4$lcompras/data4$total)
data4$plvive<-100*(data4$lvive/data4$total)
data4$tdelito<-100*(data4$delito/data4$total)
library(ggplot2)
p<-ggplot(data4,aes(tdelito,plcompras))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de compras",
title="Diagrama de dispersion 1")
cor(data4$plcompras,data4$tdelito)
## [1] 0.265463
p<-ggplot(data4,aes(tdelito,plpaseo))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de paseo",
title="Diagrama de dispersion 1")
cor(data4$plpaseo,data4$tdelito)
## [1] 0.3287581
p<-ggplot(data4,aes(tdelito,plvive))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de residencia",
title="Diagrama de dispersion 1")
cor(data4$plvive,data4$tdelito)
## [1] 0.2052642
p<-ggplot(data4,aes(tdelito,plcompras, colour=tipo))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de compras",
title="Diagrama de dispersion 1")
p<-ggplot(data4,aes(tdelito,plpaseo, colour=tipo))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de paseo",
title="Diagrama de dispersion 1")
p<-ggplot(data4,aes(tdelito,plvive,colour=tipo))
p+geom_point()+labs(x="Tasa de delitos",y="% ha cambiado lugar de residencia",
title="Diagrama de dispersion 1")
t.test(data4$tipo,data4$plcompras)
##
## Welch Two Sample t-test
##
## data: data4$tipo and data4$plcompras
## t = -19.181, df = 347.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -17.69547 -14.40397
## sample estimates:
## mean of x mean of y
## 0.2614943 16.3112139
t.test(data4$tipo,data4$plpaseo)
##
## Welch Two Sample t-test
##
## data: data4$tipo and data4$plpaseo
## t = -20.345, df = 347.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.71932 -15.41900
## sample estimates:
## mean of x mean of y
## 0.2614943 17.3306509
t.test(data4$tipo,data4$plvive)
##
## Welch Two Sample t-test
##
## data: data4$tipo and data4$plvive
## t = -10.338, df = 348.59, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.064571 -4.125781
## sample estimates:
## mean of x mean of y
## 0.2614943 5.3566702
t.test(data4$tipo,data4$tdelito)
##
## Welch Two Sample t-test
##
## data: data4$tipo and data4$tdelito
## t = -22.214, df = 347.57, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -19.94582 -16.70116
## sample estimates:
## mean of x mean of y
## 0.2614943 18.5849854
data4$pobmun<-0
data4$pobmun<-ifelse(data4$total>0 & data4$total<50000,1,data4$pobmun)
data4$pobmun<-ifelse(data4$total>=50000 & data4$total<100000,2,data4$pobmun)
data4$pobmun<-ifelse(data4$total>=100000 & data4$total<500000,3,data4$pobmun)
data4$pobmun<-ifelse(data4$total>=500000,4,data4$pobmun)
table(data4$pobmun)
##
## 1 2 3 4
## 90 65 132 61
Vamos a seguir una estrategia conocida como “Modelos Anidados” esta aproximacon nos sirve para probar hipotesis alternativas
Consiste en ajustar un modelo solamente con nuestra variable dependiente y la variable independiente de interes,
Despues incorporamos varibles “control”
modelo1<-lm(plcompras~tdelito,data=data4)
summary(modelo1)
##
## Call:
## lm(formula = plcompras ~ tdelito, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.531 -11.306 -2.746 7.962 58.743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.30632 1.26772 8.919 < 2e-16 ***
## tdelito 0.26930 0.05258 5.122 5.04e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.07 on 346 degrees of freedom
## Multiple R-squared: 0.07047, Adjusted R-squared: 0.06778
## F-statistic: 26.23 on 1 and 346 DF, p-value: 5.04e-07
modelo2<-lm(plcompras~tdelito+plvive,data=data4)
summary(modelo2)
##
## Call:
## lm(formula = plcompras ~ tdelito + plvive, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.390 -9.009 -1.811 7.432 52.194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.00920 1.16324 7.745 1.07e-13 ***
## tdelito 0.17785 0.04816 3.693 0.000258 ***
## plvive 0.74610 0.08066 9.250 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.5 on 345 degrees of freedom
## Multiple R-squared: 0.2552, Adjusted R-squared: 0.2509
## F-statistic: 59.1 on 2 and 345 DF, p-value: < 2.2e-16
Para comparar modelos usamos una prueba anova
anova(modelo1,modelo2)
## Analysis of Variance Table
##
## Model 1: plcompras ~ tdelito
## Model 2: plcompras ~ tdelito + plvive
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 346 78529
## 2 345 62922 1 15606 85.567 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo3<-lm(plcompras~tdelito+plvive+tipo,data=data4)
summary(modelo3)
##
## Call:
## lm(formula = plcompras ~ tdelito + plvive + tipo, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.749 -8.956 -1.595 7.386 53.146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.53502 1.18353 7.211 3.55e-12 ***
## tdelito 0.15277 0.04964 3.077 0.00226 **
## plvive 0.75926 0.08061 9.419 < 2e-16 ***
## tipo 3.32638 1.69887 1.958 0.05104 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.45 on 344 degrees of freedom
## Multiple R-squared: 0.2634, Adjusted R-squared: 0.257
## F-statistic: 41 on 3 and 344 DF, p-value: < 2.2e-16
anova(modelo2,modelo3)
## Analysis of Variance Table
##
## Model 1: plcompras ~ tdelito + plvive
## Model 2: plcompras ~ tdelito + plvive + tipo
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 345 62922
## 2 344 62229 1 693.52 3.8338 0.05104 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo4<-lm(plcompras~tdelito+plvive+tipo+factor(pobmun),data=data4)
summary(modelo4)
##
## Call:
## lm(formula = plcompras ~ tdelito + plvive + tipo + factor(pobmun),
## data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.245 -8.429 -2.939 6.723 51.092
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.75932 1.53731 3.746 0.000211 ***
## tdelito 0.12130 0.05066 2.394 0.017200 *
## plvive 0.75846 0.07990 9.492 < 2e-16 ***
## tipo 3.23907 1.73551 1.866 0.062850 .
## factor(pobmun)2 5.95890 2.18422 2.728 0.006699 **
## factor(pobmun)3 5.42920 1.91890 2.829 0.004941 **
## factor(pobmun)4 1.22884 2.33167 0.527 0.598522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.27 on 341 degrees of freedom
## Multiple R-squared: 0.2896, Adjusted R-squared: 0.2771
## F-statistic: 23.17 on 6 and 341 DF, p-value: < 2.2e-16
anova(modelo3,modelo4)
## Analysis of Variance Table
##
## Model 1: plcompras ~ tdelito + plvive + tipo
## Model 2: plcompras ~ tdelito + plvive + tipo + factor(pobmun)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 344 62229
## 2 341 60017 3 2211.5 4.1884 0.006255 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1