Librerias (cargue todas por si acaso desde el principio)
library(rio)
library(stargazer)
library(tidyverse)
library(stringr)
library(cluster)
library(factoextra)
library(fpc)
library(sf)
library(lmtest)
library(DescTools)
library(polycor)
library(ggcorrplot)
library(psych)
library(matrixcalc)
library(GPArotation)
library(lavaan)
library(BBmisc)
library(htmltab)
EJERCIOCIO 1
lib = htmltab(
"https://en.wikipedia.org/wiki/List_of_freedom_indices",
"/html/body/div[3]/div[3]/div[5]/div[1]/table[2]"
)
lib = arrange(lib, Country)
lib$Country = trimws(lib$Country,which=c("right"),whitespace = "[\\h\\v]")
lib$Country=substr(lib$Country,3,100)
lib[1,1]=substr(lib[1,1],5,100)
lib[2,1]=substr(lib[2,1],3,100)
lib = arrange(lib, Country)
lib[lib == "n/a"] = NA
lib = na.omit(lib)
lib$`Freedom in the World 2021` =factor(lib$`Freedom in the World 2021`,
levels=c("not free","partly free","free"),
ordered = T)
lib$`2021 Index of Economic Freedom` =factor(lib$`2021 Index of Economic Freedom`,
levels=c("repressed","mostly unfree","moderately free","mostly free","free"),
ordered = T)
lib$`2021 Press Freedom Index` =factor(lib$`2021 Press Freedom Index`,
levels=c("very serious situation","difficult situation","noticeable problems","satisfactory situation","good situation"),
ordered = T)
lib$`2020 Democracy Index` =factor(lib$`2020 Democracy Index`,
levels=c("authoritarian regime","hybrid regime","flawed democracy","full democracy"),
ordered = T)
g.dist = daisy(lib[,-1], metric="gower")
set.seed(123)
pam.resultado=pam(g.dist,5,cluster.only = F)
lib$clustPT=pam.resultado$cluster
set.seed(123)
res.pam=pam(g.dist,k = 3,cluster.only = F)
lib$pam=res.pam$cluster
res.agnes <- hcut(g.dist, k = 3,hc_func='agnes')
lib$agnes=res.agnes$cluster
res.diana <- hcut(g.dist, k = 3,hc_func='diana')
lib$diana=res.diana$cluster
fviz_silhouette(res.pam)
## cluster size ave.sil.width
## 1 1 48 0.62
## 2 2 54 0.56
## 3 3 59 0.53
fviz_silhouette(res.agnes)
## cluster size ave.sil.width
## 1 1 47 0.64
## 2 2 57 0.53
## 3 3 57 0.58
## Warning in grid.newpage(): processing of the plot ran out of memory
fviz_silhouette(res.diana)
## cluster size ave.sil.width
## 1 1 95 0.18
## 2 2 9 0.67
## 3 3 57 0.45
## Warning in grid.newpage(): processing of the plot ran out of memory
EJERCICIO 2
fecha = htmltab(
"https://en.wikipedia.org/wiki/Democracy_Index",
"/html/body/div[3]/div[3]/div[5]/div[1]/table[10]"
)
names(fecha) = c("rank","rank2","Country","type","score","score2","elect","gov","part","cult","libs")
fecha = arrange(fecha, Country)
fecha$Country = trimws(fecha$Country,which=c("right"),whitespace = "[\\h\\v]")
fecha$Country=substr(fecha$Country,3,100)
fecha[1,3]=substr(fecha[1,3],5,100)
fecha[2,3]=substr(fecha[2,3],3,100)
fecha = arrange(fecha, Country)
fecha[,c(5:11)]=lapply(fecha[,c(5:11)],as.numeric)
fecha = na.omit(fecha)
fecha = fecha [,-c(1,2,4,5,6)]
felicidad = htmltab(
"https://en.wikipedia.org/wiki/World_Happiness_Report",
"/html/body/div[3]/div[3]/div[5]/div[1]/div[11]/table/tbody/tr[2]/td/table"
)
felicidad = arrange(felicidad, `Country or region`)
felicidad$`Country or region` = trimws(felicidad$`Country or region`,which=c("right"),whitespace = "[\\h\\v]")
felicidad$`Country or region`=substr(felicidad$`Country or region`,3,100)
felicidad[1,2]=substr(felicidad[1,2],5,100)
felicidad[2,2]=substr(felicidad[2,2],3,100)
felicidad = arrange(felicidad, `Country or region`)
felicidad[,-c(2)]=lapply(felicidad[,-c(2)],as.numeric)
felicidad = na.omit(felicidad)
felicidad = felicidad[,-c(1,3)]
d = merge(fecha,felicidad, by.x = "Country", by.y = "Country or region")
set.seed(123)
g.dist = daisy(d[,-c(1)], metric="gower")
fviz_nbclust(d[,-c(1)], pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
fviz_nbclust(d[,-c(1)], hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
fviz_nbclust(d[,-c(1)], hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")
set.seed(123)
res.pam=pam(g.dist,k = 5,cluster.only = F)
d$pam=res.pam$cluster
res.agnes <- hcut(g.dist, k = 5,hc_func='agnes')
d$agnes=res.agnes$cluster
res.diana <- hcut(g.dist, k = 3,hc_func='diana')
d$diana=res.diana$cluster
fviz_silhouette(res.pam)
## cluster size ave.sil.width
## 1 1 34 0.19
## 2 2 22 0.22
## 3 3 15 0.22
## 4 4 17 0.30
## 5 5 21 0.33
fviz_silhouette(res.agnes)
## cluster size ave.sil.width
## 1 1 38 0.21
## 2 2 22 0.23
## 3 3 11 0.31
## 4 4 16 0.31
## 5 5 22 0.32
fviz_silhouette(res.diana)
## cluster size ave.sil.width
## 1 1 33 0.35
## 2 2 51 0.17
## 3 3 25 0.34
corMatrix=polycor::hetcor(d[,-c(1,13:15)])$correlations
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.78
## MSA for each item =
## elect gov
## 0.73 0.87
## part cult
## 0.87 0.81
## libs GDP per capita
## 0.79 0.76
## Social support Healthy life expectancy
## 0.74 0.88
## Freedom to make life choices Generosity
## 0.60 0.63
## Perceptions of corruption
## 0.59
cortest.bartlett(corMatrix,n=nrow(d[,-c(1,13:15)]))$p.value>0.05
## [1] FALSE
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(d[,-c(1,13:15)],fm = 'ML', fa = 'fa',correct = T)
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
resfa <- fa(d[,-c(1,13:15)],
nfactors = 3,
cor = 'mixed',
rotate = "varimax",
fm="minres")
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR3 MR2
## elect 0.943
## gov 0.738
## part 0.720
## cult 0.590
## libs 0.905
## GDP per capita 0.903
## Social support 0.740
## Healthy life expectancy 0.824
## Freedom to make life choices
## Generosity 0.502
## Perceptions of corruption 0.765
##
## MR1 MR3 MR2
## SS loadings 3.417 2.603 1.323
## Proportion Var 0.311 0.237 0.120
## Cumulative Var 0.311 0.547 0.667
fa.diagram(resfa)
EJERCICIO 3
a=import("https://github.com/Fabians099/ExamenFinal/blob/main/API_SH.XPD.CHEX.GD.ZS_DS2_en_excel_v2_3360262.xls?raw=true")
## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
a = a[-c(1:3),c(1,63)]
names(a) = c("Pais", "sal")
a = na.omit(a)
b=import("https://github.com/Fabians099/ExamenFinal/raw/main/export%20(1).csv")
b = b[,c(1,3)]
b = na.omit(b)
names(b) = c("Pais", "edu")
c=import("https://github.com/Fabians099/ExamenFinal/raw/main/export%20(2).csv")
c = c[,c(1,3)]
names(c) = c("Pais", "mil")
c = na.omit(c)
a = merge(a,b)
a = merge(a,c)
a[,-c(1)]=lapply(a[,-c(1)],as.numeric)
modelo=formula(edu~sal+mil)
regresion=lm(modelo,data=a)
summary(regresion,type = "text")
##
## Call:
## lm(formula = modelo, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7661 -0.9090 -0.1817 0.9314 4.5709
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.47832 0.38146 6.497 1.45e-09 ***
## sal 0.27174 0.04699 5.783 4.86e-08 ***
## mil 0.05593 0.08377 0.668 0.505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.44 on 135 degrees of freedom
## Multiple R-squared: 0.1987, Adjusted R-squared: 0.1868
## F-statistic: 16.73 on 2 and 135 DF, p-value: 3.221e-07
modelo1=formula(sal~edu+mil)
regresion=lm(modelo1,data=a)
summary(regresion,type = "text")
##
## Call:
## lm(formula = modelo1, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1289 -1.6840 -0.4144 1.4152 10.1212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7400 0.6403 5.841 3.68e-08 ***
## edu 0.7306 0.1263 5.783 4.86e-08 ***
## mil -0.1686 0.1368 -1.232 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.361 on 135 degrees of freedom
## Multiple R-squared: 0.2049, Adjusted R-squared: 0.1932
## F-statistic: 17.4 on 2 and 135 DF, p-value: 1.891e-07
modelo2=formula(mil~edu+sal)
regresion=lm(modelo2,data=a)
summary(regresion,type = "text")
##
## Call:
## lm(formula = modelo2, data = a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7444 -0.8761 -0.3499 0.3717 8.8967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.05814 0.41180 4.998 1.77e-06 ***
## edu 0.05885 0.08813 0.668 0.505
## sal -0.06598 0.05354 -1.232 0.220
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.477 on 135 degrees of freedom
## Multiple R-squared: 0.01126, Adjusted R-squared: -0.003387
## F-statistic: 0.7688 on 2 and 135 DF, p-value: 0.4656