Paola Nieto (20150967)
Descargue y limpie las siguientes bases de datos: https://en.wikipedia.org/wiki/Democracy_Index *Use la tabla “Democracy Index by country 2019”
library(rio)
linkToData='https://github.com/jcgcjuan/Magallanes-Clases-/raw/master/Data%20EconoFreedom.xlsx'
data1=import(linkToData)
library(htmltab)
linkPage = "https://en.wikipedia.org/wiki/Democracy_Index"
linkPath = '//*[@id="mw-content-text"]/div/table[2]'
data2 = htmltab(doc = linkPage,
which = linkPath)
De la primera base de datos extraiga solo las variables Country, Property Rights, Judical Effectiveness, Government Integrity;. Estas serán las variables independientes.
data1[,c(1,3:6,10:18)]=NULL
data1[,]=lapply(data1[,], trimws,whitespace = "[\\h\\v]")
names(data1)[names(data1)=='Country Name']='Country'
data1[,-c(1)]=lapply(data1[,-c(1)],as.numeric)
## Warning in lapply(data1[, -c(1)], as.numeric): NAs introduced by coercion
## Warning in lapply(data1[, -c(1)], as.numeric): NAs introduced by coercion
## Warning in lapply(data1[, -c(1)], as.numeric): NAs introduced by coercion
str(data1)
## 'data.frame': 186 obs. of 4 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ Property Rights : num 19.6 54.8 31.6 35.9 47.8 57.2 79.1 84.2 59.1 42.2 ...
## $ Judical Effectiveness: num 29.6 30.6 36.2 26.6 44.5 46.3 86.5 71.3 53.1 46.9 ...
## $ Government Integrity : num 25.2 40.4 28.9 20.5 33.5 38.6 79.9 77.4 44.7 43.7 ...
De la segunda base de datos extraiga las variables Country y Score Esta será su variable dependiente.
library(readr)
library(magrittr)
library(stringr)
data2[,c(1,4:11)]=NULL
names(data2)=str_split(names(data2),">>",simplify = T)[,1]%>%gsub('\\s','',.)
data2[,]=lapply(data2[,], trimws,whitespace = "[\\h\\v]")
data2$Score = as.numeric(data2$Score)
str(data2)
## 'data.frame': 167 obs. of 2 variables:
## $ Country: chr "Norway" "Iceland" "Sweden" "New Zealand" ...
## $ Score : num 9.87 9.58 9.39 9.26 9.25 9.24 9.22 9.22 9.09 9.03 ...
data3 = merge(data1,data2, by.x = 'Country',
by.y = 'Country')
str(data3)
## 'data.frame': 157 obs. of 5 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ Property Rights : num 19.6 54.8 31.6 35.9 47.8 57.2 79.1 84.2 59.1 63.5 ...
## $ Judical Effectiveness: num 29.6 30.6 36.2 26.6 44.5 46.3 86.5 71.3 53.1 50.7 ...
## $ Government Integrity : num 25.2 40.4 28.9 20.5 33.5 38.6 79.9 77.4 44.7 53.6 ...
## $ Score : num 2.85 5.89 4.01 3.72 7.02 5.54 9.09 8.29 2.75 2.55 ...
names(data3)=c("Country","PRORI","JUEF","GOIN","Score")
#REGRESION
#modelos
modelo1=formula(Score ~ PRORI)
modelo2=formula(Score ~ PRORI + JUEF)
modelo3= formula(Score ~ PRORI + JUEF + GOIN)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
reg1=lm(modelo1,data=data3)
stargazer(reg1,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## Score
## -----------------------------------------------
## Constant 1.320***
## (0.350)
##
## PRORI 0.079***
## (0.006)
##
## -----------------------------------------------
## Observations 157
## R2 0.514
## Adjusted R2 0.511
## Residual Std. Error 1.554 (df = 155)
## F Statistic 163.796*** (df = 1; 155)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
reg2=lm(modelo2,data=data3)
stargazer(reg2,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## Score
## -----------------------------------------------
## Constant 1.454***
## (0.347)
##
## PRORI 0.105***
## (0.012)
##
## JUEF -0.034***
## (0.013)
##
## -----------------------------------------------
## Observations 157
## R2 0.535
## Adjusted R2 0.529
## Residual Std. Error 1.525 (df = 154)
## F Statistic 88.586*** (df = 2; 154)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Checkeando que el error disminuya significativamente
tanova=anova(reg1,reg2)
stargazer(tanova,type = 'text',summary = F,title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## ===========================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -------------------------------------------
## 1 155 374.434
## 2 154 358.116 1 16.318 7.017 0.009
## -------------------------------------------
#El H0 de anova es que los modelos (o medias) no difieren
#PR es 0, por lo que el H0 se rechaza
reg3=lm(modelo3,data=data3)
stargazer(reg3,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## Score
## -----------------------------------------------
## Constant 1.648***
## (0.372)
##
## PRORI 0.096***
## (0.013)
##
## JUEF -0.047***
## (0.016)
##
## GOIN 0.022
## (0.015)
##
## -----------------------------------------------
## Observations 157
## R2 0.541
## Adjusted R2 0.532
## Residual Std. Error 1.520 (df = 153)
## F Statistic 60.119*** (df = 3; 153)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Comparando entre modelos
tanova2=anova(reg2,reg3)
stargazer(tanova2,type = 'text',summary = F,title = "Table de Análisis de Varianza 2")
##
## Table de Análisis de Varianza 2
## ===========================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -------------------------------------------
## 1 154 358.116
## 2 153 353.460 1 4.656 2.015 0.158
## -------------------------------------------
La comparación de modelos usando la tabla de análisis de varianza propone como hipótesis nula que los modelos no difieren. El mejor modelo es el tercero porque el r2 está mas cerca de 1