library(htmltab)
demoindex = "https://en.wikipedia.org/wiki/Democracy_Index"
seccion2='//*[@id="mw-content-text"]/div/table[2]/tbody'
demo<- htmltab(doc =demoindex,
which = seccion2 ,encoding = "UTF-8")
demo = demo[,c(2,3)]
names(demo)= c('Country','demoindex')
demo$demoindex=as.numeric(demo$demoindex)
demo$Country=trimws(demo$Country,whitespace = "[\\h\\v]")
#Exportamos tabla de datos
library(htmltab) #instalemos primero
urldead = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/354rank.html"
deadkids = htmltab(doc = urldead,
which ='//*[@id="rankOrder"]', #herramientas de desarrollador
encoding = "UTF-8")
## No encoding supplied: defaulting to UTF-8.
#Eliminamos las columnas que no nos sirven y renombramos los que nos quedan
deadkids = deadkids[,c(2,3)]
names(deadkids) = c("Country" , "Mortalidadinf")
deadkids$Mortalidadinf = as.numeric(deadkids$Mortalidadinf)
library(htmltab) #instalemos primero
urlanalf = "https://www.worldatlas.com/articles/the-highest-literacy-rates-in-the-world.html"
analf = htmltab(doc = urlanalf,
which ='//*[@id="artReg-table"]/table', #herramientas de desarrollador
encoding = "UTF-8")
analf$Rank=NULL
names(analf) = c("Country" , "Alfabetismo")
#La columna Analfabetismo debe ser cambiada a numerica
#Para esto, se debe reemplazar el caracter "%" por vacio
analf$Alfabetismo =gsub(" %", "", analf$Alfabetismo) #Reemplazar un solo caracter
analf$Alfabetismo = as.numeric(analf$Alfabetismo)
## Warning: NAs introduced by coercion
library(htmltab)
urlEsperanzaVida = "https://www.cia.gov/library/publications/the-world-factbook/rankorder/2102rank.html"
esperanza = htmltab(doc = urlEsperanzaVida,
which ='//*[@id="rankOrder"]',
encoding = "UTF-8")
## No encoding supplied: defaulting to UTF-8.
#Eliminamos las columnas que no nos sirven y renombramos los que nos quedan
esperanza = esperanza[,c(2,3)]
names(esperanza)=c("Country", "AñosEstim")
esperanza$AñosEstim=as.numeric(esperanza$AñosEstim)
#Primero verificamos que no se vayan paises porque tienen nombres distintos en las dos bases de datos. Para esto, creamos un merge 'borrador' que nos permitira ver los paises que se perderian si es que no les cambiamos de nombre.
bye1=merge(analf,deadkids, all.x=T,all.y=T)
Cambiamos de nombre a los paises de la variable Alfabetismo
#Despues de emplear el codigo bye1[!complete.cases(bye1),] verificamos cuales son los países que se repiten
analf[183,1] = "Cote d'Ivoire"
analf[53,1] = "Micronesia"
analf[66,1] = "South Korea"
analf[137,1] = "Congo Brazzaville"
analf[90,1] = "Venezuela"
analf[88,1] = "Gaza Strip"
analf[78,1] = "Brunei"
analf[70,1] = "Macedonia"
analf[100,1] = "Vietnam"
analf[7,1] = "North Korea"
analf[110,1] = "Bolivia"
analf[126,1] = "Syria"
analf[132,1] = "Iran"
analf[146,1] = "Congo Kinshasa"
analf[150,1] = "Laos"
analf[160,1] = "Tanzania"
analf[21,1] = "Russia"
analf[29,1] = "Moldova"
Cambiamos de nombre a los paises de la variable Mortalidad infantil
deadkids[8,1] = "Congo Kinshasa" #
deadkids[16,1] = "Gambia" #
deadkids[212,1] = "South Korea" #
deadkids[81,1] = "Micronesia" #
deadkids[126,1] = "Bahamas" #
deadkids[75,1] = "North Korea"
deadkids[214,1] = "Czech Republic"
deadkids[23,1] = "Congo Brazzaville"
Hacemos el merge teniendo en cuenta solo paises que esten en ambas datas
deanalf = merge(analf, deadkids, by='Country')
Verificamos que no se vayan paises porque tienen nombres distintos en las dos bases de datos. Para esto, creamos un merge ‘bye2’ que nos permitira ver los paises que se perderian si es que no les cambiamos de nombre.
bye2=merge(deanalf,esperanza, all.x=T,all.y=T)
Empleamos este código bye2[!complete.cases(bye2),] como estrategia para identifcar que paises estan con doble escritura
esperanza[11,1] = "South Korea"
esperanza[210,1] = "Congo Kinshasa"
esperanza[206,1] = "Congo Brazzaville"
esperanza[180,1] = "Gambia"
esperanza[56,1] = "Czech Republic"
esperanza[139,1] = "Micronesia"
esperanza[145,1] = "Bahamas"
esperanza[157,1] = "North Korea"
idhtotal = merge(deanalf, esperanza, by='Country')
library(htmltab)
urlEducacion = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/369rank.html"
edu = htmltab(doc = urlEducacion,
which ='//*[@id="rankOrder"]',
encoding = "UTF-8") #Exportamos tabla de la CIA
## No encoding supplied: defaulting to UTF-8.
edu=edu[c(2:3)] #Seleccionamos columnas
names(edu)=c("Country","EduPBI") #Ponemos nombres a las dimensiones seleccionadas
edu$EduPBI = gsub(",", "", edu$EduPBI) #Suplantamos la "," por ""
edu$EduPBI=as.numeric(edu$EduPBI) #Convertimos a númerico a la variable seleccionada
edu$Country=trimws(edu$Country,whitespace = "[\\h\\v]") #Por cuestiones de precausión eliminamos si hay algun whitespace
library(htmltab)
urlSalud = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/358rank.html"
salud = htmltab(doc = urlSalud,
which ='//*[@id="rankOrder"]',
encoding = "UTF-8")
## No encoding supplied: defaulting to UTF-8.
salud=salud[c(2,3)]
names(salud)=c("Country","SaludPBI")
salud$SaludPBI=as.numeric(salud$SaludPBI)
salud$Country=trimws(salud$Country,whitespace = "[\\h\\v]")
Merge variables Salud y Educacion
edusalud=merge(edu,salud,all.x=T,all.y=T)
VEMOS QUE PAISES ESTAN ESCRITOS DE MANERA DISTINTA
edusalud[!complete.cases(edusalud),] #Revisar casos incompletos
## Country EduPBI SaludPBI
## 3 Algeria NA 7.2
## 9 Aruba 6.1 NA
## 13 Bahamas, The NA 7.7
## 21 Bermuda 1.5 NA
## 24 Bosnia and Herzegovina NA 9.6
## 27 British Virgin Islands 6.3 NA
## 40 China NA 5.5
## 50 Curacao 4.9 NA
## 56 Dominican Republic NA 4.4
## 58 Egypt NA 5.6
## 60 Equatorial Guinea NA 3.8
## 61 Eritrea NA 3.3
## 70 Gaza Strip 5.3 NA
## 74 Greece NA 8.1
## 82 Hong Kong 3.3 NA
## 88 Iraq NA 5.5
## 97 Kiribati NA 10.2
## 99 Kuwait NA 3.0
## 106 Libya NA 5.0
## 107 Liechtenstein 2.6 NA
## 110 Macau 3.1 NA
## 111 Macedonia NA 6.5
## 118 Marshall Islands NA 17.1
## 126 Montenegro NA 6.4
## 127 Montserrat 5.1 NA
## 131 Nauru NA 3.3
## 137 Nigeria NA 3.7
## 138 Niue NA 7.4
## 142 Palau NA 9.0
## 144 Papua New Guinea NA 4.3
## 150 Puerto Rico 6.1 NA
## 161 Saudi Arabia NA 4.7
## 175 Suriname NA 5.7
## 184 Tonga NA 5.2
## 185 Trinidad and Tobago NA 5.9
## 189 Turks and Caicos Islands 3.3 NA
## 190 Tuvalu NA 16.5
## 193 United Arab Emirates NA 3.6
## 201 West Bank 5.3 NA
## 202 Yemen NA 5.6
## 203 Zambia NA 5.0
#Cambiamos de nombre
edusalud[44, 1] = "Congo Brazzaville"
edusalud[43, 1]= "Congo Kinshasa"
edusalud[13,1] = "Bahamas"
edusalud[122,1] = "Micronesia"
edusalud[98,1] = "South Korea"
edusalud[69,1] = "Gambia"
edusalud[52,1] = "Czech Republic"
Cambiamos de character a numerico
edusalud$EduPBI=as.numeric(edusalud$EduPBI)
edusalud$SaludPBI=as.numeric(edusalud$SaludPBI)
Revisamos datos incompletos
edusalud[!complete.cases(edusalud),]
## Country EduPBI SaludPBI
## 3 Algeria NA 7.2
## 9 Aruba 6.1 NA
## 13 Bahamas NA 7.7
## 21 Bermuda 1.5 NA
## 24 Bosnia and Herzegovina NA 9.6
## 27 British Virgin Islands 6.3 NA
## 40 China NA 5.5
## 50 Curacao 4.9 NA
## 56 Dominican Republic NA 4.4
## 58 Egypt NA 5.6
## 60 Equatorial Guinea NA 3.8
## 61 Eritrea NA 3.3
## 70 Gaza Strip 5.3 NA
## 74 Greece NA 8.1
## 82 Hong Kong 3.3 NA
## 88 Iraq NA 5.5
## 97 Kiribati NA 10.2
## 99 Kuwait NA 3.0
## 106 Libya NA 5.0
## 107 Liechtenstein 2.6 NA
## 110 Macau 3.1 NA
## 111 Macedonia NA 6.5
## 118 Marshall Islands NA 17.1
## 126 Montenegro NA 6.4
## 127 Montserrat 5.1 NA
## 131 Nauru NA 3.3
## 137 Nigeria NA 3.7
## 138 Niue NA 7.4
## 142 Palau NA 9.0
## 144 Papua New Guinea NA 4.3
## 150 Puerto Rico 6.1 NA
## 161 Saudi Arabia NA 4.7
## 175 Suriname NA 5.7
## 184 Tonga NA 5.2
## 185 Trinidad and Tobago NA 5.9
## 189 Turks and Caicos Islands 3.3 NA
## 190 Tuvalu NA 16.5
## 193 United Arab Emirates NA 3.6
## 201 West Bank 5.3 NA
## 202 Yemen NA 5.6
## 203 Zambia NA 5.0
Nos quedamos solo con los datos completos
edusalud = edusalud[complete.cases(edusalud),]
Merge de prueba para verificar que no haya países con diversas escrituras
bye3=merge(idhtotal, edusalud, all.x=T,all.y=T)
#Empleamos bye3[!complete.cases(bye3),] para observar que paises tenian distinta escritura y paises con data incompleta
Merge de Idh y edusalud, considerando a los casos completos y de igual escritura
idhedusalud = merge(idhtotal, edusalud, by='Country')
idhedusalud = idhedusalud[complete.cases(idhedusalud),]
idhedusalud[26,1] ="Cape Verde" #Cambiamos de Cabo a Cape para más adelante
Revisamos que no haya casos incompletos
idhedusalud[!complete.cases(idhedusalud),] #Revisar casos completos
## [1] Country Alfabetismo Mortalidadinf AñosEstim EduPBI
## [6] SaludPBI
## <0 rows> (or 0-length row.names)
Eliminamos todos los bye para mas orden
bye1 = NULL
bye2 = NULL
bye3 = NULL
Revisamos estructura del merge
str(idhedusalud)
## 'data.frame': 159 obs. of 6 variables:
## $ Country : chr "Afghanistan" "Albania" "Andorra" "Angola" ...
## $ Alfabetismo : num 32 97 100 71 99 98 100 99 98 100 ...
## $ Mortalidadinf: num 108.5 11.6 3.6 65.8 11.7 ...
## $ AñosEstim : num 51.7 78.5 82.9 60.2 76.7 77.3 74.9 82.3 81.6 72.8 ...
## $ EduPBI : num 3.9 4 3.2 3.5 2.5 5.6 2.8 5.3 5.5 2.9 ...
## $ SaludPBI : num 8.2 8.2 8.1 3.3 5.5 4.8 4.5 9.4 11.2 6 ...
#Llamamos a la base de datos con la tabla de Libertad de prensa
library(openxlsx)
Prensa = "https://docs.google.com/spreadsheets/d/e/2PACX-1vR8GEwySOCwIQ5VfwNpI0Ydluoaokx4x-2y1D1bqIoZU_4WNEYsYdpXVDCEcajnVQ/pub?output=xlsx"
LibertadPrensa=read.xlsx(Prensa,
sheet = 2,
startRow = 5,
skipEmptyRows = T, skipEmptyCols = T) #Exportamos excel del drive a R
LibertadPrensa=LibertadPrensa[,c(1, 181)] #Seleccionamos columnas
names(LibertadPrensa)=c("Country", "PrensaScore") #Nombres a la variable
LibertadPrensa$PrensaScore=as.numeric(LibertadPrensa$PrensaScore) #Convertimos a númerico
## Warning: NAs introduced by coercion
str(LibertadPrensa)
## 'data.frame': 210 obs. of 2 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ PrensaScore: num 60 51 65 15 73 34 46 63 22 22 ...
#Llamamos a la base de datos con la tabla de Libertades civiles y Derechos politicos
library(openxlsx)
noPrensa = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQjtafXVvyArEk7D4Nm_0BcnJ-lFThbS7w5NrN23eJ0G2RBOSjqPdOjkulyhbo1dQ/pub?output=xlsx"
PRightsCLiber=read.xlsx(noPrensa,
sheet = 3,
startRow = 1,
skipEmptyRows = T, skipEmptyCols = T)
PRightsCLiber=PRightsCLiber[,c(1, 10, 15)] #Seleccionamos las variables a utilizar
LibertadesCiviles=PRightsCLiber[c(1, 3)] #Seleccionamos las dimensiones
names(LibertadesCiviles)=c("Country", "ScoreCivilLib") #nombramos a las dimensiones
DerechosPoliticos=PRightsCLiber[c(1, 2)]
names(DerechosPoliticos)=c("Country", "ScorePolRi")
LibertadesIncompleto=merge(DerechosPoliticos, LibertadesCiviles)
Verificamos que no hayan casos incompletos
LibertadesIncompleto[!complete.cases(LibertadesIncompleto),]
## [1] Country ScorePolRi ScoreCivilLib
## <0 rows> (or 0-length row.names)
bye4=merge(LibertadesIncompleto, LibertadPrensa, all.x=T,all.y=T)
# Empleamos el bye4[!complete.cases(bye4),] para revisar datos incompleto y paises con doble escritura
Renombramos los paises que tienen doble escritura
LibertadPrensa[42,1] = "Cote d'Ivoire"
LibertadPrensa[155,1] = "Sao Tome and Principe"
LibertadesIncompleto[76,1] = "Hong Kong"
LibertadesIncompleto[172,1] = "Saint Lucia"
LibertadesIncompleto[173,1] = "Saint Vincent and the Grenadines"
LibertadesIncompleto[44,1] = "Crimea"
Libertades=merge(LibertadesIncompleto, LibertadPrensa)
Renombramos a Congo
Libertades[39, 1] = "Congo Brazzaville"
Libertades[40, 1] = "Congo Kinshasa"
library(htmltab)
urlPBI = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/211rank.html"
pbi = htmltab(doc = urlPBI,
which ='//*[@id="rankOrder"]', #herramientas de desarrollador
encoding = "UTF-8")
## No encoding supplied: defaulting to UTF-8.
pbi=pbi[-c(1,4)] #Eliminamos la columna 1 y 4
names(pbi)=c("Country","PBI") #Nombrar a las variables
pbi$PBI= gsub("\\$|\\,", "", pbi$PBI) #Sustitución
pbi$PBI= as.numeric(pbi$PBI) #Convertimos a númerico
bye5=merge(Libertades, pbi, all.x=T,all.y=T)
#Empleamos el bye5[!complete.cases(bye5),] para revisar datos incompletos y paises con doble escritura
Renombramos a los paises
pbi[161,1] = "Congo Brazzaville"
pbi[62,1] = "Bahamas"
pbi[157,1] = "Cape Verde"
pbi[226,1] = "Congo Kinshasa"
pbi[57,1] = "Czech Republic"
pbi[197,1] = "Gambia"
pbi[46,1] = "South Korea"
pbi[214,1] = "North Korea"
pbi[189,1] = "Micronesia"
Libertades[175,1] = "Gambia"
LibPBI=merge(Libertades, pbi)
Eliminamos los borradores
bye4 = NULL
bye5 = NULL
#Verificamos que los paises esten bien escritos
mergeVarInd=merge(LibPBI, idhedusalud, all.x= T, all.y=T)
# Empleamos el mergeVarInd[!complete.cases(mergeVarInd),] para encontrar paises con doble escritura
mergeVarInd=merge(LibPBI, idhedusalud, by= "Country")
Cambiamos la orientacion de la variable Prensa Score, ya que es contraintuitiva
mergeVarInd$PrensaScore= 100 - mergeVarInd$PrensaScore
Cambiamos la orientacion de la variable Mortalidad Infantil, ya que es contraintuitiva
mergeVarInd$Mortalidadinf = 110 - mergeVarInd$Mortalidadinf
En lugar de ranking, ponemos el nombre de los paises
row.names(mergeVarInd) = mergeVarInd$Country
Hacemos merge con nuestra variable dependiente Primero, hacemos el merge de prueba entre la data “mergeVarInd” y nuestra variable dependiente “demo”
bye6 = merge(demo, mergeVarInd, all.x= T, all.y=T)
Empleamos bye6[!complete.cases(bye6),] para encontrar que paises estan escritas de manera distinta
demo[132,1] ="Congo Brazzaville"
demo[166,1] = "Congo Kinshasa"
demo[114,1]= "Cote d'Ivoire"
datatotal = merge(demo, mergeVarInd, by= "Country")
Eliminamos bye6
bye6 = NULL
mergeVarInd$Country = NULL
row.names(datatotal) = datatotal$Country
datatotal$Country=NULL
VarInd_s = scale(datatotal[c(5:8)])
head(VarInd_s) #Corroboramos
## PBI Alfabetismo Mortalidadinf AñosEstim
## Afghanistan -0.85578207 -2.4381676 -3.8689302 -2.3470799
## Albania -0.37428541 0.6648598 0.5452134 0.7929648
## Angola -0.63566931 -0.5763512 -1.9237916 -1.3511702
## Argentina 0.01091192 0.7125987 0.6408759 0.6523657
## Armenia -0.51185589 0.8080764 0.5133259 0.3711677
## Australia 1.36368825 0.7603375 0.8823100 1.2381950
VarInd_s=as.data.frame(VarInd_s)
install.packages("psych")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(psych) #instalamos y llamamos al paquete psych
pearson = cor(VarInd_s)
pearson
## PBI Alfabetismo Mortalidadinf AñosEstim
## PBI 1.0000000 0.5328767 0.6260885 0.6882802
## Alfabetismo 0.5328767 1.0000000 0.8558475 0.7634484
## Mortalidadinf 0.6260885 0.8558475 1.0000000 0.9125011
## AñosEstim 0.6882802 0.7634484 0.9125011 1.0000000
cor.plot(pearson,
numbers=T,
upper=FALSE,
main = "Correlation",
show.legend = FALSE)
KMO(VarInd_s) # se ve el Overall MSA
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = VarInd_s)
## Overall MSA = 0.76
## MSA for each item =
## PBI Alfabetismo Mortalidadinf AñosEstim
## 0.89 0.81 0.69 0.74
fa.parallel(pearson, fm="pa", fa="fa", main = "Scree Plot",n.obs = nrow(VarInd_s))
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
VarInd_sFA <- fa(VarInd_s,
nfactors=2,
rotate="varimax"
)
VarInd_sFA
## Factor Analysis using method = minres
## Call: fa(r = VarInd_s, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## PBI 0.61 0.32 0.47 0.5275 1.5
## Alfabetismo 0.43 0.84 0.89 0.1100 1.5
## Mortalidadinf 0.68 0.67 0.91 0.0879 2.0
## AñosEstim 0.89 0.46 1.00 0.0025 1.5
##
## MR1 MR2
## SS loadings 1.80 1.48
## Proportion Var 0.45 0.37
## Cumulative Var 0.45 0.82
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 3.76 with Chi Square of 502.64
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 137 with the empirical chi square 0.01 with prob < NA
## The total number of observations was 137 with Likelihood Chi Square = 0.21 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.015
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.97 0.91
## Multiple R square of scores with factors 0.95 0.83
## Minimum correlation of possible factor scores 0.90 0.66
VarInd_sFA$loadings
##
## Loadings:
## MR1 MR2
## PBI 0.606 0.324
## Alfabetismo 0.427 0.841
## Mortalidadinf 0.678 0.673
## AñosEstim 0.887 0.459
##
## MR1 MR2
## SS loadings 1.796 1.476
## Proportion Var 0.449 0.369
## Cumulative Var 0.449 0.818
# cercania a factores
sort(VarInd_sFA$complexity)
## Alfabetismo AñosEstim PBI Mortalidadinf
## 1.482629 1.499749 1.528382 1.999885
fa.diagram(VarInd_sFA)
indices_est=as.data.frame(VarInd_sFA$scores)
names(indices_est)= c("desarrollo1FA", "desarrollo2FA")
examen1=merge(datatotal,indices_est,by=0)
head(examen1)#resultado
## Row.names demoindex ScorePolRi ScoreCivilLib PrensaScore PBI
## 1 Afghanistan 2.97 10 14 40 2000
## 2 Albania 5.98 28 40 49 12500
## 3 Angola 3.62 10 14 27 6800
## 4 Argentina 7.02 33 49 54 20900
## 5 Armenia 4.79 16 29 37 9500
## 6 Australia 9.09 40 58 78 50400
## Alfabetismo Mortalidadinf AñosEstim EduPBI SaludPBI desarrollo1FA
## 1 32 1.5 51.7 3.9 8.2 -1.19653248
## 2 97 98.4 78.5 4.0 8.2 0.73802680
## 3 71 44.2 60.2 3.5 3.3 -1.18086323
## 4 98 100.5 77.3 5.6 4.8 0.46331332
## 5 100 97.7 74.9 2.8 4.5 0.02502237
## 6 99 105.8 82.3 5.3 9.4 1.26759311
## desarrollo2FA
## 1 -2.7591208
## 2 0.3041609
## 3 -0.6231774
## 4 0.5272451
## 5 0.7683134
## 6 0.2425163
row.names(examen1)=examen1$Row.names
examen1$Row.names=NULL
head(examen1)#resultado
## demoindex ScorePolRi ScoreCivilLib PrensaScore PBI
## Afghanistan 2.97 10 14 40 2000
## Albania 5.98 28 40 49 12500
## Angola 3.62 10 14 27 6800
## Argentina 7.02 33 49 54 20900
## Armenia 4.79 16 29 37 9500
## Australia 9.09 40 58 78 50400
## Alfabetismo Mortalidadinf AñosEstim EduPBI SaludPBI
## Afghanistan 32 1.5 51.7 3.9 8.2
## Albania 97 98.4 78.5 4.0 8.2
## Angola 71 44.2 60.2 3.5 3.3
## Argentina 98 100.5 77.3 5.6 4.8
## Armenia 100 97.7 74.9 2.8 4.5
## Australia 99 105.8 82.3 5.3 9.4
## desarrollo1FA desarrollo2FA
## Afghanistan -1.19653248 -2.7591208
## Albania 0.73802680 0.3041609
## Angola -1.18086323 -0.6231774
## Argentina 0.46331332 0.5272451
## Armenia 0.02502237 0.7683134
## Australia 1.26759311 0.2425163
Graficamos desarrolloFA y libertadesFA
plot(examen1[c(11,12)])
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
base=ggplot(examen1,aes(x=desarrollo1FA,y=desarrollo2FA))
base + geom_point() + geom_text(aes(label = row.names(examen1)))
Var_d=dist(VarInd_s)
VarInd_map = cmdscale(Var_d, eig=TRUE, k=2)
VarInd_map$GOF
## [1] 0.9297427 0.9297427
#El resultado es 0.929 en ambos indices, y estan mas cerca a 1 lo que indica
Queremos saber las distancias entre nuestros casos y qué tipos de vecindarios se forma
titulo= "Mapa de similitudes entre paises"
x = VarInd_map$points[,1]
y = VarInd_map$points[,2]
plot(x, y, xlab= "Dimension 1", ylab = "Dimension 2", main=titulo, type = "n")
text (x, y, labels =rownames(VarInd_map$points), cex=0.5)
VarInd_map_DF = as.data.frame(VarInd_map$points)
install.packages("ggrepel")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(ggrepel)
base=ggplot(VarInd_map_DF,aes(x=V1,y=V2))
base+geom_point() + geom_text_repel(aes(label=row.names(VarInd_map_DF)),size=2.6)
Para realizar mi modelo, necesitare factorizar las variables de libertades
examen2_s = scale(datatotal[c(2:4)])
head(examen2_s) #Corroboramos
## ScorePolRi ScoreCivilLib PrensaScore
## Afghanistan -1.1389301 -1.3849946 -0.45844197
## Albania 0.2759826 0.2319673 -0.06644547
## Angola -1.1389301 -1.3849946 -1.02465913
## Argentina 0.6690139 0.7916849 0.15133036
## Armenia -0.6672925 -0.4521320 -0.58910747
## Australia 1.2192577 1.3514025 1.19665435
examen2_s=as.data.frame(examen2_s)
install.packages("psych")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(psych) #instalamos y llamamos al paquete psych
pearson2 = cor(examen2_s)
pearson2
## ScorePolRi ScoreCivilLib PrensaScore
## ScorePolRi 1.0000000 0.9616109 0.9227345
## ScoreCivilLib 0.9616109 1.0000000 0.9452193
## PrensaScore 0.9227345 0.9452193 1.0000000
cor.plot(pearson2,
numbers=T,
upper=FALSE,
main = "Correlation",
show.legend = FALSE)
KMO(examen2_s) # se ve el Overall MSA
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = examen2_s)
## Overall MSA = 0.76
## MSA for each item =
## ScorePolRi ScoreCivilLib PrensaScore
## 0.77 0.69 0.84
fa.parallel(pearson2, fm="pa", fa="fa", main = "Scree Plot",n.obs = nrow(examen2_s))
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
examen2_sFA <- fa(examen2_s,
nfactors=1,
rotate="varimax"
)
VarInd_sFA
## Factor Analysis using method = minres
## Call: fa(r = VarInd_s, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## PBI 0.61 0.32 0.47 0.5275 1.5
## Alfabetismo 0.43 0.84 0.89 0.1100 1.5
## Mortalidadinf 0.68 0.67 0.91 0.0879 2.0
## AñosEstim 0.89 0.46 1.00 0.0025 1.5
##
## MR1 MR2
## SS loadings 1.80 1.48
## Proportion Var 0.45 0.37
## Cumulative Var 0.45 0.82
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 3.76 with Chi Square of 502.64
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 137 with the empirical chi square 0.01 with prob < NA
## The total number of observations was 137 with Likelihood Chi Square = 0.21 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.015
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.97 0.91
## Multiple R square of scores with factors 0.95 0.83
## Minimum correlation of possible factor scores 0.90 0.66
examen2_sFA$loadings
##
## Loadings:
## MR1
## ScorePolRi 0.969
## ScoreCivilLib 0.992
## PrensaScore 0.952
##
## MR1
## SS loadings 2.831
## Proportion Var 0.944
fa.diagram(examen2_sFA) #buen indice
indices_est2=as.data.frame(examen2_sFA$scores)
names(indices_est2)= c("LibertadesFA")
examen2=merge(datatotal,indices_est2,by=0)
head(examen2)#resultado
## Row.names demoindex ScorePolRi ScoreCivilLib PrensaScore PBI
## 1 Afghanistan 2.97 10 14 40 2000
## 2 Albania 5.98 28 40 49 12500
## 3 Angola 3.62 10 14 27 6800
## 4 Argentina 7.02 33 49 54 20900
## 5 Armenia 4.79 16 29 37 9500
## 6 Australia 9.09 40 58 78 50400
## Alfabetismo Mortalidadinf AñosEstim EduPBI SaludPBI LibertadesFA
## 1 32 1.5 51.7 3.9 8.2 -1.2466525
## 2 97 98.4 78.5 4.0 8.2 0.2075030
## 3 71 44.2 60.2 3.5 3.3 -1.3097270
## 4 98 100.5 77.3 5.6 4.8 0.7033659
## 5 100 97.7 74.9 2.8 4.5 -0.5067541
## 6 99 105.8 82.3 5.3 9.4 1.3184587
row.names(examen2)=examen2$Row.names
examen2$Row.names=NULL
head(examen2)#resultado
## demoindex ScorePolRi ScoreCivilLib PrensaScore PBI
## Afghanistan 2.97 10 14 40 2000
## Albania 5.98 28 40 49 12500
## Angola 3.62 10 14 27 6800
## Argentina 7.02 33 49 54 20900
## Armenia 4.79 16 29 37 9500
## Australia 9.09 40 58 78 50400
## Alfabetismo Mortalidadinf AñosEstim EduPBI SaludPBI
## Afghanistan 32 1.5 51.7 3.9 8.2
## Albania 97 98.4 78.5 4.0 8.2
## Angola 71 44.2 60.2 3.5 3.3
## Argentina 98 100.5 77.3 5.6 4.8
## Armenia 100 97.7 74.9 2.8 4.5
## Australia 99 105.8 82.3 5.3 9.4
## LibertadesFA
## Afghanistan -1.2466525
## Albania 0.2075030
## Angola -1.3097270
## Argentina 0.7033659
## Armenia -0.5067541
## Australia 1.3184587
Planteamos que a mayores libertades mayor democracia
names(examen2)
## [1] "demoindex" "ScorePolRi" "ScoreCivilLib" "PrensaScore"
## [5] "PBI" "Alfabetismo" "Mortalidadinf" "AñosEstim"
## [9] "EduPBI" "SaludPBI" "LibertadesFA"
Planteamos el siguiente modelo: A mas libertades de derechos politicos (ScorePolRi) mayor democracia (demoindex)
modelo=lm(demoindex~LibertadesFA,data = examen2) #Despues sacar summary para ver cuales son significantes
summary(modelo)
##
## Call:
## lm(formula = demoindex ~ LibertadesFA, data = examen2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32681 -0.39961 -0.01845 0.47060 2.43376
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.75985 0.06367 90.46 <2e-16 ***
## LibertadesFA 2.04140 0.06425 31.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7452 on 135 degrees of freedom
## Multiple R-squared: 0.882, Adjusted R-squared: 0.8812
## F-statistic: 1009 on 1 and 135 DF, p-value: < 2.2e-16
Grafiquemos matematicamente y = 5.76 + 2.04(x)
Asimismo, se concluye que cada punto menos o extra en Libertades hace que la democracia varie en un 2.04%
Se asume relación lineal entre Y y Xs:
# linea roja debe tender a horizontal
plot(modelo, 1)
-Si hay linealidad
# linea roja debe tender a horizontal
plot(modelo, 3)
-Sigue siendo valido
install.packages("lmtest")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# null: modelo homocedastico
bptest(modelo)
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 1.6308, df = 1, p-value = 0.2016
# puntos cerca a la diagonal
plot(modelo, 2)
shapiro.test(modelo$residuals) #para ver la normalidad de nuestras observaciones
##
## Shapiro-Wilk normality test
##
## data: modelo$residuals
## W = 0.98528, p-value = 0.1493
plot(modelo, 5) #Para obsevar si hay valores influyentes
Gracias