Abrimos librerías
library(rio)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
PRIMERO TRABAJAREMOS NUESTRA VARAIBLE DEPENDENDIENTE: contagios por población
Traemos la data de contagios
data_covid= "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
#Es necesario sacar días
WorldData<-import(file = data_covid)%>%
mutate(type="datacon")%>%
tidyr::gather(Fecha,Valor,-c(type,"Province/State",
"Country/Region",Lat,Long)) #juntando fechas distintas en una sola.
WorldData= WorldData%>%
filter(Valor>0)
Convertimos a formato fecha
#Convertimos a formato de fecha
WorldData$Fecha=mdy(WorldData$Fecha)
WorldData$Fecha=as.Date(WorldData$Fecha)
Ponemos un nombre
names(WorldData)[2]="Country"
Juntamos provincias en paises
WorldData=aggregate(Valor
~ Country + Fecha,
data = WorldData,
sum)
Nombramos bien Egipto
WorldData$Country=gsub('Egypt',"Egypt, Arab Rep.",WorldData$Country)
Un parénteisis necesario para tener el código de cada país Ahora, necesitamos agregar el código a cada país y quedarnos con eso
link1="https://github.com/CarlosGDiez/BasesLimpias/raw/master/Gee_sucio.csv"
oto=import(link1)
oto = oto[,c(1,2)]
names(oto) = c("Country","CODE")
oto=oto[!duplicated(oto), ]
Calculamos el día 100
Dia100=WorldData%>%
group_by(Country)%>%
mutate(dia100= ifelse(Fecha==nth(Fecha,100),1,0))%>%
filter(dia100==1)
#Mergeamos con Oto para el código
Dia100=merge(oto,Dia100, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia100=Dia100[,c(2:4)]
#Nombramos bien el valor
names(Dia100)[2] = "Fecha100"
names(Dia100)[3] = "Valor100"
#Variable mergeable que servirá más adelante
Dia100$DIA100=paste(Dia100$CODE,Dia100$Fecha)
Calculamos el día 7
Dia7=WorldData%>%
group_by(Country)%>%
mutate(dia7= ifelse(Fecha==nth(Fecha,7),1,0))%>%
filter(dia7==1)
#Mergeamos con Oto para el código
Dia7=merge(oto,Dia7, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia7=Dia7[,c(2:4)]
#Nombramos bien el valor
names(Dia7)[2] = "Fecha7"
names(Dia7)[3] = "Valor7"
#Variable mergeable que servirá más adelante
Dia7$DIA7=paste(Dia7$CODE,Dia7$Fecha)
Ahora podemos tocar World data por separado
WorldData=merge(oto,WorldData, by.x = 'Country', by.y='Country')
WorldData$Country = NULL
Un parénteisis necesario para abrir más librerías
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse
## The following object is masked from 'package:base':
##
## isFALSE
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(cluster)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
library(dbscan)
library(descr)
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
## The following object is masked from 'package:car':
##
## Recode
## The following object is masked from 'package:BBmisc':
##
## %nin%
library(foreign)
library(fpc)
##
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
##
## dbscan
library(ggcorrplot)
## Loading required package: ggplot2
library(GPArotation)
library(haven)
library(htmltab)
library(jsonlite)
library(matrixcalc)
library(nFactors)
## Loading required package: lattice
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
library(nortest)
library(parameters)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(PMCMRplus)
library(polycor)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following objects are masked from 'package:DescTools':
##
## AUC, ICC, SD
## The following object is masked from 'package:car':
##
## logit
library(readr)
library(readxl)
library(rio)
library(see)
library(stringi)
library(stringr)
library(tidyr)
library(tidyverse)
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.4 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x data.table::between() masks dplyr::between()
## x BBmisc::coalesce() masks dplyr::coalesce()
## x BBmisc::collapse() masks dplyr::collapse()
## x lubridate::date() masks base::date()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x data.table::first() masks dplyr::first()
## x purrr::flatten() masks jsonlite::flatten()
## x data.table::hour() masks lubridate::hour()
## x lubridate::intersect() masks base::intersect()
## x data.table::isoweek() masks lubridate::isoweek()
## x dplyr::lag() masks stats::lag()
## x data.table::last() masks dplyr::last()
## x data.table::mday() masks lubridate::mday()
## x data.table::minute() masks lubridate::minute()
## x data.table::month() masks lubridate::month()
## x data.table::quarter() masks lubridate::quarter()
## x car::recode() masks dplyr::recode()
## x data.table::second() masks lubridate::second()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::some() masks car::some()
## x purrr::transpose() masks data.table::transpose()
## x lubridate::union() masks base::union()
## x data.table::wday() masks lubridate::wday()
## x data.table::week() masks lubridate::week()
## x data.table::yday() masks lubridate::yday()
## x data.table::year() masks lubridate::year()
library(Rmisc)
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
Traemos la data de población en cada país
linkedin = "https://github.com/AriannaNKZC/Estad-2/raw/master/%C2%BFSera%20la%20data%3F.xls"
poblacion = import(linkedin)
Nos quedamos con las columnas que nos sirven
poblacion = poblacion[,c(1,2,64)]
Le ponemos nombre
names(poblacion)= c("Country", "CODE", "pobla")
Ahora combinamos las datas de contagios y las de población
WorldData=merge(poblacion,WorldData, by.x = 'CODE', by.y='CODE')
AHORA TRABAJAMOS LAS VARIABLES INDEPENDIENTES
Un parénteisis necesario para tener el código de cada país en español Ahora, necesitamos agregar el código a cada país y quedarnos con eso Traemos la base de datos
CODESPAÑOL<- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
CDSP=import(CODESPAÑOL)
Nos quedamos con las columnas y filas que nos sirven
names(CDSP)=(CDSP[1,])
CDSP = CDSP[-1,]
CDSP = CDSP[,c(1,2)]
Le ponemos nombres
names(CDSP) = c("PAIS", "CODE")
Traemos la data
data_ppp <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_NY.GDP.PCAP.CD_DS2_es_csv_v2_1347337.csv"
ppp_pib =import(data_ppp)
Nos quedamos con las filas y columnas que nos sirven
names(ppp_pib)=(ppp_pib[1,])
ppp_pib = ppp_pib[-1,]
ppp_pib = ppp_pib[,c(2,63)]
Le ponemos nombres
names(ppp_pib) = c("CODE", "PPP_2018")
Traemos la data (LA MISMA QUE SE USÓ PARA CREAR A OTO)
GEE=import(link1)
Le ponemos nombres
names(GEE) = c("Country","CODE","Series", "SC", "GEE")
Nos quedamos con las filas y columnas que nos sirven
#Filtrar para tomar valor GEE y no el error estandar
GEE=GEE%>%
group_by(Country)%>%
mutate(Index = ifelse(Series==nth(Series,1), 1, 0))%>%
filter(Index==1)
#eliminamos filas vacías
GEE=GEE[-c(215,216,217,218,219),]
## Warning: The `i` argument of ``[.tbl_df`()` must lie in [-rows, 0] if negative, as of tibble 3.0.0.
## Use `NA_integer_` as row index to obtain a row full of `NA` values.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#Columnas necesarias
GEE = GEE[,c(2,5)]
Traemos las data
link2="https://github.com/CarlosGDiez/BasesLimpias/blob/master/Rigurosidad.csv?raw=true"
Rigurosidad=import(link2)
Nos quedamos con las filas y columnas que nos sirven
Rigurosidad=Rigurosidad[, c(1,2,5,35)]
Les ponemos nombres
names(Rigurosidad) = c("Country", "CODE","Date","Rigurosidad")
Hay que ordenarlos y juntarlos por fechas
Rigurosidad$Date <- ymd(Rigurosidad$Date)
Creamos variables mergeables
Rigurosidad$DIA7=paste(Rigurosidad$CODE,Rigurosidad$Date)
Nos quedamos solo con la información a la semana de contagios
Rigurosidad=merge(Rigurosidad,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
Rigurosidad=Rigurosidad[,c(3,5)]
Nombramos bien las columnas
names(Rigurosidad) = c("CODE","Rigurosidad")
Traemos las data
infocamp = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/public-campaigns-covid.csv"
dataic=import(infocamp)
Hay que ordenarlos y juntarlos por fechas
dataic$Date <- ymd(dataic$Date)
Creamos variables mergeables
dataic$DIA7=paste(dataic$Code,dataic$Date)
Nos quedamos solo con la información a la semana de contagios
dataic=merge(dataic,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
dataic=dataic[,c(5,6)]
Traemos la data
xurb = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/API_SP.URB.TOTL.IN.ZS_DS2_es_csv_v2_1347951.csv"
dataxurb=import(xurb)
Reacomodamos el nombre de las columnas
names(dataxurb)=(dataxurb[1,])
Nos quedamos con las columnas y filas que nos sirven
dataxurb=dataxurb[,c(2,64)]
dataxurb=dataxurb[-1,]
Nombramos bien las columnas
names(dataxurb) = c("CODE","Poburbana")
Nombramos bien las filas
dataxurb$num=c(1:264)
rownames(dataxurb)=dataxurb[,3]
dataxurb[,3]= NULL
Redondeamos
dataxurb$Poburbana=round(dataxurb$Poburbana, digits = 2)
Traemos la data
LIDH="https://github.com/CarlaMendozaE/Prueba/blob/master/IDH.xlsx?raw=true"
IDH=import(LIDH)
Nos quedamos con las filas y columnas que nos sirven
IDH[,c(1,8,9)]=NULL
Ponemos nombres
names(IDH) = c("Country","HDI","EXPECTATIVAVIDA","EXPECTCOLE","YEARS_SCHOOLING","GNI_GROSSNATIONALINCOME")
IDH$Country=gsub('Egypt',"Egypt, Arab Rep.",IDH$Country)
Convertimos a numéricas
IDH[,c(2:6)]=lapply(IDH[,c(2:6)], as.numeric)
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
Redondeamos
IDH[2:6]=round(IDH[,2:6], digits = 2)
Agregamos CODE
IDH=merge(oto,IDH, by.x = 'Country', by.y='Country')
Traemos la data
linkayuda="https://raw.githubusercontent.com/CarlosGDiez/BasesLimpias/master/Rigurosidad.csv"
dataayuda=import(linkayuda)
Nos quedamos con las filas y columnas que nos sirven
dataayuda = dataayuda[,c(2,5, 21)]
#USA
dataayuda <- dataayuda[-c(48601 :62640), ]
#UK
dataayuda <- dataayuda[-c(16741 :17820), ]
Les ponemos nombres
names(dataayuda) = c("CODE","Date","Ayuda Económica")
Hay que ordenarlos y juntarlos por fechas
dataayuda$Date <- ymd(dataayuda$Date)
Creamos variables mergeables
dataayuda$DIA7=paste(dataayuda$CODE,dataayuda$Date)
Nos quedamos solo con la información a la semana de contagios
dataayuda=merge(dataayuda,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
dataayuda = dataayuda[,c(2,4)]
Nombramos bien CODE
names(dataayuda)[1] = "CODE"
Traemos la data
linkdensidad="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/Excel%20densidad.xlsx.xls"
datadensidad=import(linkdensidad)
## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
Reacomodamos el nombre de las columnas
names(datadensidad)=(datadensidad[3,])
Nos quedamos con las filas y columnas que nos sirven
datadensidad = datadensidad[,c(2, 63)]
datadensidad <- datadensidad[-c(1:3),]
Ponemos nombres
names(datadensidad) = c("CODE","Densidadpob")
Convertimos a numéricas
datadensidad$Densidadpob=as.numeric(datadensidad$Densidadpob)
Redondeamos
datadensidad$Densidadpob=round(datadensidad$Densidadpob, digits = 2)
Traemos la data
datadesempleo <- "https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/datadesempleooriginal.csv"
datadesempleo=import(datadesempleo)
Le ponemos nombre
names(datadesempleo)= c("PAIS", "Tasadesempleo")
datadesempleo$PAIS=gsub("Egipto","Egipto, República Árabe de",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Benín","Benin",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bahráin","Bahrein",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bosnia y Hercegovina","Bosnia y Herzegovina",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bután","Bhután",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Botsuana","Botswana",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kazajistán","Kazajstán",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kenia","Kenya",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Lesoto","Lesotho",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Malaui","Malawi",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Nueva Zelanda","Nueva Zelandia",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Ruanda","Rwanda",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Arabia Saudí","Arabia Saudita",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Surinam","Suriname",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Zimbabue","Zimbabwe",datadesempleo$PAIS)
Agregamos CODE
datadesempleo=merge(CDSP,datadesempleo, by.x = 'PAIS', by.y='PAIS')
Traemos la data
perro = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/258c45e7-1b68-4b8e-853d-a2554f1bb145_Data.csv"
regulatory = import(perro)
Nos quedamos con las filas y columnas que nos sirven
regulatory=regulatory[, c(2,5)]
Ponemos nombres
names(regulatory) = c("CODE","Regulatory_quality")
Convertimos a numéricas
regulatory$Regulatory_quality=as.numeric(regulatory$Regulatory_quality)
## Warning: NAs introduced by coercion
Redondeamos
regulatory$Regulatory_quality=round(regulatory$Regulatory_quality, digits = 2)
Traemos la data
gato= "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/51253f2e-7374-408f-8685-c729a64d043a_Data.csv"
control_co = import(gato)
Nos quedamos con las filas y columnas que nos sirven
control_co=control_co[, c(2,5)]
Ponemos nombres
names(control_co) = c("CODE","Control_co")
Convertimos a numéricas
control_co$Control_co=as.numeric(control_co$Control_co)
## Warning: NAs introduced by coercion
Redondeamos
control_co$Control_co=round(control_co$Control_co, digits = 2)
Traemos la data
AXA = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/a9249c7d-95ab-4618-9160-3a247dea2bae_Data.csv"
ruleof = import(AXA)
Nos quedamos con las filas y columnas que nos sirven
ruleof=ruleof[, c(2,5)]
Ponemos nombres
names(ruleof) = c("CODE","Ruleoflaw")
Convertimos a numéricas
ruleof$Ruleoflaw=as.numeric(ruleof$Ruleoflaw)
## Warning: NAs introduced by coercion
Redondeamos
ruleof$Ruleoflaw=round(ruleof$Ruleoflaw, digits = 2)
Traemos la data
VA = 'https://github.com/AriannaNKZC/Estad-2/raw/master/Voice_and_accountability.csv'
VocA = import(VA)
Nos quedamos con las filas y columnas que nos sirven
VocA=VocA[, c(2,5)]
Ponemos nombres
names(VocA) = c("CODE","Voice_acco")
Convertimos a numéricas
VocA$Voice_acco=as.numeric(VocA$Voice_acco)
## Warning: NAs introduced by coercion
Redondeamos
VocA$Voice_acco=round(VocA$Voice_acco, digits = 2)
Traemos la data
PS='https://github.com/AriannaNKZC/Estad-2/raw/master/e0757e7a-8829-44d2-a7a3-11a580c19a53_Data.csv'
PolS = import(PS)
Nos quedamos con las filas y columnas que nos sirven
PolS=PolS[, c(2,5)]
Ponemos nombres
names(PolS) = c("CODE","Political_sta")
Convertimos a numéricas
PolS$Political_sta=as.numeric(PolS$Political_sta)
## Warning: NAs introduced by coercion
Redondeamos
PolS$Political_sta=round(PolS$Political_sta, digits = 2)
MERGEAMOS TODAS LAS VARIABLES EN UN SOLO DATAFRAME
Data=merge(PolS,VocA, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ruleof, by.x = 'CODE', by.y='CODE')
Data=merge(Data,control_co, by.x = 'CODE', by.y='CODE')
Data=merge(Data,regulatory, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadesempleo, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadensidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataayuda, by.x = 'CODE', by.y='CODE')
Data=merge(Data,IDH, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataxurb, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataic, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Rigurosidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,GEE, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ppp_pib, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Dia100, by.x = 'CODE', by.y='CODE')
Data=merge(Data,poblacion, by.x = 'CODE', by.y='CODE')
Limpiamos
#Eliminamos columnas
Data=Data[,c(-7,-24,-25)]
#Eliminamos filas repetidas
Data = Data[!duplicated(Data),]
Nombramos bien
names(Data)[10] = "Country"
names(Data)[17] = "infoalawk"
Arreglamos numérica
Data$GEE=as.numeric(Data$GEE)
## Warning: NAs introduced by coercion
Redondeamos
Data$GEE=round(Data$GEE, digits = 2)
Data$PPP_2018=round(Data$PPP_2018, digits = 2)
Arreglamos ordinales
#Ayuda Económica
Data$`Ayuda Económica`= as.ordered(Data$`Ayuda Económica`)
levels(Data$`Ayuda Económica`) = c("Sin apoyo", "Menos del 50% del sueldo")
table(Data$`Ayuda Económica`)
##
## Sin apoyo Menos del 50% del sueldo
## 121 8
#Campañas infomrativas
Data$infoalawk = as.ordered(Data$infoalawk)
levels(Data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(Data$infoalawk)
##
## Ninguna Campañas del gobierno Campañas integrales
## 17 19 93
Eliminamos na’s
Data=na.omit(Data)
Limpieza final de la Data
names(Data)
## [1] "CODE" "Political_sta"
## [3] "Voice_acco" "Ruleoflaw"
## [5] "Control_co" "Regulatory_quality"
## [7] "Tasadesempleo" "Densidadpob"
## [9] "Ayuda Económica" "Country"
## [11] "HDI" "EXPECTATIVAVIDA"
## [13] "EXPECTCOLE" "YEARS_SCHOOLING"
## [15] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [17] "infoalawk" "Rigurosidad"
## [19] "GEE" "PPP_2018"
## [21] "Fecha100" "Valor100"
## [23] "pobla"
Data$Valor100 = (Data$Valor100/Data$pobla)*100
rownames(Data) = Data$Country
Data$Country = NULL
Data$CODE = NULL
Data$Fecha100 = NULL
EN ESTA PARTE DIVIDIMOS LAS VARIABLES EN CATEGORÍAS
str(Data$`Ayuda Económica`)
## Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
Mode(Data$`Ayuda Económica`)
## [1] Sin apoyo
## attr(,"freq")
## [1] 119
## Levels: Sin apoyo < Menos del 50% del sueldo
Median(Data$`Ayuda Económica`, na.rm = TRUE) #sin apoyo
## [1] Sin apoyo
## Levels: Sin apoyo < Menos del 50% del sueldo
IQR(Data$`Ayuda Económica`) #1
## [1] 0
pie(table(Data$`Ayuda Económica`), main="Gráfico 1: Apoyo a través de ingresos contexto Covid-19", col = c("mediumpurple1", "purple", "lightslateblue"))
str(Data$infoalawk)
## Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
Mode(Data$infoalawk) #Moda: campañas integrales
## [1] Campañas integrales
## attr(,"freq")
## [1] 90
## Levels: Ninguna < Campañas del gobierno < Campañas integrales
Median(Data$infoalawk) #Mediana: campañas integrales
## [1] Campañas integrales
## Levels: Ninguna < Campañas del gobierno < Campañas integrales
IQR(Data$infoalawk)
## [1] 1
library(ggplot2)
pie(table(Data$infoalawk), main="Gráfico 2: Campañas informativas del Covid-19", col = c("mediumpurple1", "purple", "lightslateblue"))
str(Data$Rigurosidad)
## num [1:126] 27.78 33.33 81.48 0 2.78 ...
summary(Data$Rigurosidad)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 11.11 22.22 31.11 50.70 93.52
sd(Data$Rigurosidad)
## [1] 25.83322
hist(Data$Rigurosidad, col = "royalblue1", main = "Gráfico 3: Índice de rigurosidad en medidas tempranas según países", xlab = "Índice de rigurosidad", ylab ="Número de países")
str(Data$`Densidadpob`)
## num [1:126] 56.9 24.7 104.6 163.8 135.6 ...
summary(Data$`Densidadpob`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.04 30.55 89.42 213.91 185.29 7953.00
Mode(Data$`Densidadpob`)
## [1] NA
## attr(,"freq")
## [1] 1
sd(Data$`Densidadpob`, na.rm = TRUE)
## [1] 734.7614
mis.colores = colorRampPalette( c( "lightslateblue","cyan1"))
hist(Data$`Densidadpob`, col = mis.colores(14), main = "Gráfico 4: Densidad de población por metro cuadrado", xlab = "Número de personas por metro cuadrado ", ylab = "Número de países")
str(Data$`Poburbana`)
## num [1:126] 25.8 66.2 61.2 88 86.8 ...
summary(Data$`Poburbana`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.52 47.27 63.70 62.23 80.53 100.00
sd(Data$`Poburbana`)
## [1] 21.95835
Mode(Data$`Poburbana`)
## [1] 55.98 100.00
## attr(,"freq")
## [1] 2
hist(Data$`Poburbana`, col = mis.colores(14), main = "Gráfico 5: Población urbana según países", xlab = "Porcentaje de población urbana", ylab = "Número de países")
str(Data$HDI)
## num [1:126] 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
summary(Data$HDI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3800 0.6200 0.7600 0.7303 0.8500 0.9500
sd(Data$HDI, na.rm = TRUE)
## [1] 0.1508957
Mode(Data$HDI)
## [1] 0.76
## attr(,"freq")
## [1] 7
boxplot(Data$HDI, col = "plum1", main = "Gráfico 6: Indice de Desarrollo Humano")
summary(Data$PPP_2018)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 381.3 2075.1 7093.7 16801.4 20013.4 116654.3
sd(Data$PPP_2018, na.rm = TRUE)
## [1] 22170.64
Mode(Data$PPP_2018, na.rm = TRUE)
## [1] NA
## attr(,"freq")
## [1] 1
mis.colores1 = colorRampPalette( c( "plum", "mediumpurple1","mediumpurple2", "plum1", "plum2"))
boxplot(Data$PPP_2018, col = mis.colores1(14), main = "Gráfico 7: PBI per cápita según el precio del dolar", xlab = "PPP 2018", ylab = NULL )
summary(Data$Tasadesempleo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 4.00 7.00 10.20 11.75 77.00
sd(Data$Tasadesempleo, na.rm = TRUE)
## [1] 10.41769
Mode(Data$Tasadesempleo, na.rm = TRUE)
## [1] 6
## attr(,"freq")
## [1] 14
boxplot(Data$Tasadesempleo, col = "plum1", main = "Gráfico 8: Porcentaje de desempleo en el 2018")
summary(Data$Political_sta)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.65000 -0.65750 -0.08500 -0.07762 0.64000 1.66000
sd(Data$Political_sta, na.rm = TRUE)
## [1] 0.9378438
Mode(Data$Political_sta, na.rm = TRUE)
## [1] -0.92 -0.83 -0.78 -0.55 -0.54 -0.35 -0.24 -0.23 -0.10 -0.08 0.06 0.11
## [13] 0.12 0.31 0.46 0.52 0.53 0.64 0.70 0.82 1.01 1.05 1.09
## attr(,"freq")
## [1] 2
mis.colores1 = colorRampPalette( c( "plum", "mediumpurple1","mediumpurple2", "plum1", "plum2"))
hist(Data$Political_sta, col = mis.colores1(14), main = "Gráfico 9: Estabilidad política por país según países", xlab = "Estabilidad Política", ylab = "Número de países" )
summary(Data$Ruleoflaw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.85000 -0.58000 -0.13000 0.07579 0.67250 2.02000
sd(Data$Ruleoflaw, na.rm = TRUE)
## [1] 0.964479
Mode(Data$Ruleoflaw, na.rm = TRUE)
## [1] -0.43
## attr(,"freq")
## [1] 4
mis.colores1 = colorRampPalette( c( "plum", "mediumpurple1","mediumpurple2", "plum1", "plum2"))
hist(Data$Ruleoflaw, col = mis.colores1(14), main = "Gráfico 10: Imperio de la ley según países", xlab = "Imperio de la ley", ylab = "Número de países" )
str(Data$GEE)
## num [1:126] -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
summary(Data$GEE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9100 -0.6100 -0.0050 0.1101 0.6525 2.2300
sd(Data$GEE)
## [1] 0.961738
Mode(Data$GEE)
## [1] 0.11
## attr(,"freq")
## [1] 4
hist(Data$GEE, col = mis.colores1(14), main = "Gráfico 11: Índice de Efectividad de la Gobernanza según países", xlab = "Índice de Efectividad de la Gobernanza", ylab = "Número de países" )
str(Data$Voice_acco)
## num [1:126] -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
summary(Data$Voice_acco)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.83000 -0.66000 0.12000 0.09802 0.89500 1.69000
sd(Data$Voice_acco)
## [1] 0.9142315
Mode(Data$Voice_acco)
## [1] 0.03 0.53
## attr(,"freq")
## [1] 3
hist(Data$Voice_acco, col = mis.colores1(14), main = "Gráfico 12: Voz y rendición de cuentas según países", xlab = "Voz y rendición de cuentas", ylab = "Número de países" )
str(Data$Control_co)
## num [1:126] -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
summary(Data$Control_co)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.60000 -0.70750 -0.19000 0.06984 0.75000 2.17000
sd(Data$Control_co)
## [1] 1.01238
Mode(Data$Control_co)
## [1] -0.32
## attr(,"freq")
## [1] 4
hist(Data$Control_co, col = mis.colores1(14), main = "Gráfico 13: Control de la corrupción según países", xlab = "Control de la corrupción", ylab = "Número de países" )
str(Data$Regulatory_quality)
## num [1:126] -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
summary(Data$Regulatory_quality)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.3500 -0.5700 -0.0600 0.1278 0.8825 2.1600
sd(Data$Regulatory_quality)
## [1] 0.933464
Mode(Data$Regulatory_quality)
## [1] -0.11
## attr(,"freq")
## [1] 3
hist(Data$Regulatory_quality, col = mis.colores1(14), main = "Gráfico 14: Calidad regulatoria según países", xlab = "Calidad regulatoria", ylab = "Número de países" )
################################################################################
CONTINÚA LA DIVISIÓN POR CATEGORÍAS
#Con número de contagios al día 100
tab1=table(Data$`Ayuda Económica`,Data$Valor100)
chisq.test(tab1)
## Warning in chisq.test(tab1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tab1
## X-squared = 126, df = 125, p-value = 0.4581
aov(Data$Valor100~Data$`Ayuda Económica`)
## Call:
## aov(formula = Data$Valor100 ~ Data$`Ayuda Económica`)
##
## Terms:
## Data$`Ayuda Económica` Residuals
## Sum of Squares 0.07331 9.69036
## Deg. of Freedom 1 124
##
## Residual standard error: 0.2795497
## Estimated effects may be unbalanced
summary(aov(Data$Valor100~Data$`Ayuda Económica`))
## Df Sum Sq Mean Sq F value Pr(>F)
## Data$`Ayuda Económica` 1 0.073 0.07331 0.938 0.335
## Residuals 124 9.690 0.07815
TukeyHSD(aov(Data$Valor100~Data$`Ayuda Económica`))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Data$Valor100 ~ Data$`Ayuda Económica`)
##
## $`Data$`Ayuda Económica``
## diff lwr upr p adj
## Menos del 50% del sueldo-Sin apoyo -0.1053042 -0.3204977 0.1098893 0.3346532
ggplot(Data, aes(y = Valor100, x = `Ayuda Económica`,fill=factor(`Ayuda Económica`))) +
geom_boxplot()+ggtitle("Gráfico 15: Número de contagios según el tipo de Ayuda económica")+xlab("Apoyo económico")
#Es una variable categórica requiere anova o chi cuadrado
#Con número de contagios al día 100
tabla=table(Data$infoalawk,Data$Valor100)
chisq.test(tabla)
## Warning in chisq.test(tabla): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tabla
## X-squared = 252, df = 250, p-value = 0.4526
aov(Data$Valor100~Data$infoalawk)
## Call:
## aov(formula = Data$Valor100 ~ Data$infoalawk)
##
## Terms:
## Data$infoalawk Residuals
## Sum of Squares 0.194237 9.569433
## Deg. of Freedom 2 123
##
## Residual standard error: 0.278927
## Estimated effects may be unbalanced
summary(aov(Data$Valor100~Data$infoalawk))
## Df Sum Sq Mean Sq F value Pr(>F)
## Data$infoalawk 2 0.194 0.09712 1.248 0.291
## Residuals 123 9.569 0.07780
TukeyHSD(aov(Data$Valor100~Data$infoalawk))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Data$Valor100 ~ Data$infoalawk)
##
## $`Data$infoalawk`
## diff lwr upr
## Campañas del gobierno-Ninguna 0.04696467 -0.1739541 0.26788343
## Campañas integrales-Ninguna -0.05757857 -0.2325748 0.11741770
## Campañas integrales-Campañas del gobierno -0.10454323 -0.2716130 0.06252651
## p adj
## Campañas del gobierno-Ninguna 0.8693544
## Campañas integrales-Ninguna 0.7156298
## Campañas integrales-Campañas del gobierno 0.3018519
ggplot(Data, aes(y = Valor100, x = infoalawk,fill=factor(infoalawk))) +
geom_boxplot()+ggtitle("Gráfico 16: Número de contagios según el tipo de campañas")+xlab("campañas informativas")
#Con número de contagios al día 100
cor.test(Data$Rigurosidad,Data$Valor100) #p-value 0.03 y cor -0.19
##
## Pearson's product-moment correlation
##
## data: Data$Rigurosidad and Data$Valor100
## t = -2.4657, df = 124, p-value = 0.01504
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.37684504 -0.04290302
## sample estimates:
## cor
## -0.2161877
plot(Valor100~Rigurosidad,data=Data, main="Gráfico 17: Número de contagios según el índice de Rigurosidad en medidas tempranas", xlab="Índice de rigurosidad en medidas tempranas", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Densidadpob,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Densidadpob and Data$Valor100
## t = 1.1244, df = 124, p-value = 0.263
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07577095 0.27061893
## sample estimates:
## cor
## 0.1004674
plot(Valor100~Densidadpob,data=Data, main="Gráfico 18: Número de contagios según la densidad poblacional", xlab="Densidad poblacional", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Poburbana,Data$Valor100) #p-value 0.003 y cor 0.26
##
## Pearson's product-moment correlation
##
## data: Data$Poburbana and Data$Valor100
## t = 5.6256, df = 124, p-value = 1.167e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2996437 0.5800753
## sample estimates:
## cor
## 0.4509181
plot(Valor100~Poburbana,data=Data, main="Gráfico 19: Número de contagios según la población urbana de un país", xlab="Población urbana", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$HDI,Data$Valor100) #p-value 0.01 y cor 0.22
##
## Pearson's product-moment correlation
##
## data: Data$HDI and Data$Valor100
## t = 4.4007, df = 124, p-value = 2.3e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2058616 0.5096775
## sample estimates:
## cor
## 0.3675349
plot(Valor100~HDI,data=Data, main="Gráfico 20: Número de contagios según el IDH de un país", xlab="IDH", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$PPP_2018,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$PPP_2018 and Data$Valor100
## t = 6.7992, df = 124, p-value = 3.95e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3809398 0.6378891
## sample estimates:
## cor
## 0.5211247
plot(Valor100~PPP_2018,data=Data, main="Gráfico 21: Número de contagios según el Precio del Dólar actual per cápita de un país", xlab="PBI por Precio del dólar actual", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Tasadesempleo,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Tasadesempleo and Data$Valor100
## t = -1.5076, df = 124, p-value = 0.1342
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.30198104 0.04172577
## sample estimates:
## cor
## -0.1341603
plot(Valor100~Tasadesempleo,data=Data, main="Gráfico 22: Número de contagios según el Desempleo en un país", xlab="Tasa de desempleo", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Political_sta,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Political_sta and Data$Valor100
## t = 3.2977, df = 124, p-value = 0.001272
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1147402 0.4371449
## sample estimates:
## cor
## 0.2839486
plot(Valor100~Political_sta,data=Data, main="Gráfico 23: Número de contagios según la estabilidad política en un país", xlab="Estabilidad política", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Ruleoflaw,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Ruleoflaw and Data$Valor100
## t = 4.1167, df = 124, p-value = 6.961e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1829388 0.4918286
## sample estimates:
## cor
## 0.3467507
plot(Valor100~Ruleoflaw,data=Data, main="Gráfico 24: Número de contagios según el imperio de la ley en un país", xlab="Imperio de la ley", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$GEE,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$GEE and Data$Valor100
## t = 3.9627, df = 124, p-value = 0.0001243
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1703449 0.4819106
## sample estimates:
## cor
## 0.3352628
plot(Valor100~GEE,data=Data, main="Gráfico 25: Número de contagios según el Índice de Efectividad de la Governanza en un país", xlab=" Índice de Efectividad de la Governanza", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Voice_acco,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Voice_acco and Data$Valor100
## t = 1.1472, df = 124, p-value = 0.2535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0737526 0.2724990
## sample estimates:
## cor
## 0.1024762
plot(Valor100~Voice_acco,data=Data, main="Gráfico 26: Número de contagios según la voz y rendición de cuentas de un país", xlab="Voz y rendición de cuentas", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Control_co,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Control_co and Data$Valor100
## t = 3.6697, df = 124, p-value = 0.0003591
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1460846 0.4625774
## sample estimates:
## cor
## 0.3129942
plot(Valor100~Control_co,data=Data, main="Gráfico 27: Número de contagios según el control de la corrupción en un país", xlab="Control de la corrupción", ylab="Número de contagios")
#Con número de contagios al día 100
cor.test(Data$Regulatory_quality,Data$Valor100)
##
## Pearson's product-moment correlation
##
## data: Data$Regulatory_quality and Data$Valor100
## t = 4.2319, df = 124, p-value = 4.468e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1922861 0.4991384
## sample estimates:
## cor
## 0.3552454
plot(Valor100~Regulatory_quality,data=Data, main="Gráfico 28: Número de contagios según la calidad regulatoria en un país", xlab="Control de la corrupción", ylab="Número de contagios")
names(Data)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
Choclo<- Data[,c(1:9, 14:18)]
Generamos la matriz de correlaciones para identificar qué variables de nuestra Choclo están correlacionadas. Vemos las correlaciones significativas: Si puedes ver bloques correlacionados hay esperanza de un buen analisis resfa.
# esta es:
library(polycor)
Matrixcor=polycor::hetcor(Choclo)$correlations
ggcorrplot(Matrixcor)
ggcorrplot(Matrixcor,
p.mat = cor_pmat(Matrixcor),
insig = "blank")
Primero, verificar si datos permiten factorizar:
KMO(Matrixcor)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = Matrixcor)
## Overall MSA = 0.73
## MSA for each item =
## Political_sta Voice_acco Ruleoflaw Control_co
## 0.92 0.75 0.86 0.70
## Regulatory_quality Tasadesempleo Densidadpob Ayuda Económica
## 0.75 0.83 0.33 0.16
## HDI Poburbana infoalawk Rigurosidad
## 0.74 0.82 0.48 0.66
## GEE PPP_2018
## 0.78 0.76
#problema. menor a 0.6, ¿por qué esta pasando esto? existencia de variables que pueden mover para un lado
####Verificar si la matriz de correlaciones es adecuada. Para ello, se tienen dos funciones:
cortest.bartlett(Matrixcor,n=nrow(Choclo))$p.value>0.05
## [1] FALSE
is.singular.matrix(Matrixcor)
## [1] FALSE
Determinar en cuantos factores o variables latentes podríamos redimensionar la data. Vemos el número sugerido y también el gráfico.
Choclo$Tasadesempleo = as.numeric(Choclo$Tasadesempleo)
Choclo$`Ayuda Económica` = as.numeric(Choclo$Tasadesempleo)
Choclo$infoalawk = as.numeric(Choclo$infoalawk)
str(Choclo)
## 'data.frame': 126 obs. of 14 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality: num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Tasadesempleo : num 24 7 14 4 2 8 6 6 5 7 ...
## $ Densidadpob : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Ayuda Económica : num 24 7 14 4 2 8 6 6 5 7 ...
## $ HDI : num 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
## $ Poburbana : num 25.8 66.2 61.2 88 86.8 ...
## $ infoalawk : num 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
library(parameters)
library(nFactors)
library(see)
matrixcalc::is.singular.matrix(Matrixcor)
## [1] FALSE
sugerencia=parameters::n_factors(Matrixcor)
##
## These indices are only valid with a principal component solution.
## ...................... So, only positive eugenvalues are permitted.
plot(sugerencia)
Solicitamos el número de factores. Considerar si se presentan mensajes de alerta.
resfax <- fa(Choclo,nfactors = 2,cor = 'mixed',rotate = "varimax",fm="minres") #fijarnos en la prueba
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
Vemos el resultado inicial
print(resfax$loadings)
##
## Loadings:
## MR1 MR2
## Political_sta 0.776 -0.155
## Voice_acco 0.761
## Ruleoflaw 0.972 -0.131
## Control_co 0.940 -0.109
## Regulatory_quality 0.935 -0.181
## Tasadesempleo 0.986
## Densidadpob 0.159
## Ayuda Económica 0.986
## HDI 0.804 -0.286
## Poburbana 0.585 -0.172
## infoalawk -0.135 0.164
## Rigurosidad -0.389 0.202
## GEE 0.951 -0.223
## PPP_2018 0.814 -0.189
##
## MR1 MR2
## SS loadings 6.651 2.307
## Proportion Var 0.475 0.165
## Cumulative Var 0.475 0.640
Vemos el resultado mejorado: Cuando logramos que cada variable se vaya a un factor, tenemos una estructura simple.
print(resfax$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## Political_sta 0.776
## Voice_acco 0.761
## Ruleoflaw 0.972
## Control_co 0.940
## Regulatory_quality 0.935
## Tasadesempleo 0.986
## Densidadpob
## Ayuda Económica 0.986
## HDI 0.804
## Poburbana 0.585
## infoalawk
## Rigurosidad
## GEE 0.951
## PPP_2018 0.814
##
## MR1 MR2
## SS loadings 6.651 2.307
## Proportion Var 0.475 0.165
## Cumulative Var 0.475 0.640
fa.diagram
## function (fa.results, Phi = NULL, fe.results = NULL, sort = TRUE,
## labels = NULL, cut = 0.3, simple = TRUE, errors = FALSE,
## g = FALSE, digits = 1, e.size = 0.05, rsize = 0.15, side = 2,
## main, cex = NULL, marg = c(0.5, 0.5, 1, 0.5), adj = 1, ic = FALSE,
## ...)
## {
## if (length(class(fa.results)) > 1) {
## if (inherits(fa.results, "principal")) {
## pc <- TRUE
## }
## else {
## pc <- FALSE
## }
## }
## else {
## pc <- FALSE
## }
## if (ic)
## pc <- TRUE
## old.par <- par(mar = marg)
## on.exit(par(old.par))
## col <- c("black", "red")
## if (missing(main))
## if (is.null(fe.results)) {
## if (pc) {
## main <- "Components Analysis"
## }
## else {
## main <- "Factor Analysis"
## }
## }
## else {
## main <- "Factor analysis and extension"
## }
## if (!is.matrix(fa.results) && !is.null(fa.results$fa) &&
## is.list(fa.results$fa))
## fa.results <- fa.results$fa
## if (is.null(cex))
## cex <- 1
## if (sort) {
## if (g) {
## temp <- fa.sort(fa.results[, -1])
## temp2 <- fa.results[, 1]
## fa.results <- cbind(g = temp2[rownames(temp)], temp)
## }
## else {
## fa.results <- fa.sort(fa.results)
## }
## if (!is.null(fe.results)) {
## fe.results <- fa.sort(fe.results)
## }
## }
## if ((!is.matrix(fa.results)) && (!is.data.frame(fa.results))) {
## factors <- as.matrix(fa.results$loadings)
## if (!is.null(fa.results$Phi)) {
## Phi <- fa.results$Phi
## }
## else {
## if (!is.null(fa.results$cor)) {
## Phi <- fa.results$cor
## }
## }
## }
## else {
## factors <- fa.results
## }
## nvar <- dim(factors)[1]
## if (is.null(nvar)) {
## nvar <- length(factors)
## num.factors <- 1
## }
## else {
## num.factors <- dim(factors)[2]
## }
## nvar <- dim(factors)[1]
## e.size = e.size * 16 * cex/nvar
## if (is.null(nvar)) {
## nvar <- length(factors)
## num.factors <- 1
## }
## else {
## num.factors <- dim(factors)[2]
## }
## if (is.null(rownames(factors))) {
## rownames(factors) <- paste("V", 1:nvar, sep = "")
## }
## if (is.null(colnames(factors))) {
## colnames(factors) <- paste("F", 1:num.factors, sep = "")
## }
## var.rect <- list()
## fact.rect <- list()
## max.len <- max(nchar(rownames(factors))) * rsize
## x.max <- max((nvar + 1), 6)
## limx = c(-max.len/2, x.max)
## n.evar <- 0
## if (!is.null(fe.results)) {
## n.evar <- dim(fe.results$loadings)[1]
## limy <- c(0, max(nvar + 1, n.evar + 1))
## }
## else {
## limy = c(0, nvar + 1)
## }
## top <- max(nvar, n.evar) + 1
## plot(0, type = "n", xlim = limx, ylim = limy, frame.plot = FALSE,
## axes = FALSE, ylab = "", xlab = "", main = main, ...)
## max.len <- max(strwidth(rownames(factors)), strwidth("abc"))/1.8
## limx = c(-max.len/2, x.max)
## cex <- min(cex, 20/x.max)
## if (g) {
## left <- 0.3 * x.max
## middle <- 0.6 * x.max
## gf <- 2
## }
## else {
## left <- 0
## middle <- 0.5 * x.max
## gf <- 1
## }
## for (v in 1:nvar) {
## var.rect[[v]] <- dia.rect(left, top - v - max(0, n.evar -
## nvar)/2, rownames(factors)[v], xlim = limx, ylim = limy,
## cex = cex, draw = FALSE, ...)
## }
## all.rects.x <- rep(left, nvar)
## all.rects.y <- top - 1:nvar - max(0, n.evar - nvar)/2
## all.rects.names <- rownames(factors)[1:nvar]
## dia.rect(all.rects.x, all.rects.y, all.rects.names)
## f.scale <- (top)/(num.factors + 1)
## f.shift <- max(nvar, n.evar)/num.factors
## if (g) {
## fact.rect[[1]] <- dia.ellipse(-max.len/2, top/2, colnames(factors)[1],
## xlim = limx, ylim = limy, e.size = e.size, cex = cex,
## ...)
## for (v in 1:nvar) {
## if (simple && (abs(factors[v, 1]) == max(abs(factors[v,
## ]))) && (abs(factors[v, 1]) > cut) | (!simple &&
## (abs(factors[v, 1]) > cut))) {
## dia.arrow(from = fact.rect[[1]], to = var.rect[[v]]$left,
## labels = round(factors[v, 1], digits), col = ((sign(factors[v,
## 1]) < 0) + 1), lty = ((sign(factors[v, 1]) <
## 0) + 1))
## }
## }
## }
## text.values <- list()
## tv.index <- 1
## for (f in gf:num.factors) {
## if (pc) {
## fact.rect[[f]] <- dia.rect(left + middle, (num.factors +
## gf - f) * f.scale, colnames(factors)[f], xlim = limx,
## ylim = limy, cex = cex, draw = FALSE, ...)
## }
## else {
## fact.rect[[f]] <- dia.ellipse(left + middle, (num.factors +
## gf - f) * f.scale, colnames(factors)[f], xlim = limx,
## ylim = limy, e.size = e.size, cex = cex, draw = FALSE,
## ...)
## }
## for (v in 1:nvar) {
## if (simple && (abs(factors[v, f]) == max(abs(factors[v,
## ]))) && (abs(factors[v, f]) > cut) | (!simple &&
## (abs(factors[v, f]) > cut))) {
## if (pc) {
## text.values[[tv.index]] <- dia.arrow(to = fact.rect[[f]],
## from = var.rect[[v]]$right, labels = round(factors[v,
## f], digits), col = ((sign(factors[v, f]) <
## 0) + 1), lty = ((sign(factors[v, f]) <
## 0) + 1), adj = f%%adj, cex = cex, draw = FALSE)
## tv.index <- tv.index + 1
## }
## else {
## text.values[[tv.index]] <- dia.arrow(from = fact.rect[[f]],
## to = var.rect[[v]]$right, labels = round(factors[v,
## f], digits), col = ((sign(factors[v, f]) <
## 0) + 1), lty = ((sign(factors[v, f]) <
## 0) + 1), adj = f%%adj + 1, cex = cex, draw = FALSE)
## tv.index <- tv.index + 1
## }
## }
## }
## }
## tv <- matrix(unlist(fact.rect), nrow = num.factors, byrow = TRUE)
## all.rects.x <- tv[, 5]
## all.rects.y <- tv[, 2]
## all.rects.names <- colnames(factors)
## dia.rect(all.rects.x, all.rects.y, all.rects.names)
## tv <- matrix(unlist(text.values), byrow = TRUE, ncol = 21)
## text(tv[, 1], tv[, 2], tv[, 3], cex = tv[, 5])
## arrows(x0 = tv[, 6], y0 = tv[, 7], x1 = tv[, 8], y1 = tv[,
## 9], length = tv[1, 10], angle = tv[1, 11], code = 1,
## col = tv[, 20], lty = tv[, 21])
## arrows(x0 = tv[, 13], y0 = tv[, 14], x1 = tv[, 15], y1 = tv[,
## 16], length = tv[1, 17], angle = tv[1, 18], code = 2,
## col = tv[, 20], lty = tv[, 21])
## if (!is.null(Phi) && (ncol(Phi) > 1)) {
## curve.list <- list()
## for (i in 2:num.factors) {
## for (j in 1:(i - 1)) {
## if (abs(Phi[i, j]) > cut) {
## d.curve <- dia.curved.arrow(from = fact.rect[[j]]$right,
## to = fact.rect[[i]]$right, labels = round(Phi[i,
## j], digits), scale = (i - j), draw = FALSE,
## cex = cex, ...)
## curve.list <- c(curve.list, d.curve)
## }
## }
## }
## multi.curved.arrow(curve.list, ...)
## }
## self.list <- list()
## if (errors) {
## for (v in 1:nvar) {
## d.self <- dia.self(location = var.rect[[v]], scale = 0.5,
## side = side)
## self.list <- c(self.list, d.self)
## }
## }
## if (length(self.list) > 0)
## multi.self(self.list)
## if (!is.null(fe.results)) {
## e.loadings <- fe.results$loadings
## for (v in 1:n.evar) {
## var.rect[[v]] <- dia.rect(x.max, top - v - max(0,
## nvar - n.evar)/2, rownames(e.loadings)[v], xlim = limx,
## ylim = limy, cex = cex, ...)
## for (f in 1:num.factors) {
## if (simple && (abs(e.loadings[v, f]) == max(abs(e.loadings[v,
## ]))) && (abs(e.loadings[v, f]) > cut) | (!simple &&
## (abs(e.loadings[v, f]) > cut))) {
## dia.arrow(from = fact.rect[[f]], to = var.rect[[v]]$left,
## labels = round(e.loadings[v, f], digits),
## col = ((sign(e.loadings[v, f]) < 0) + 1),
## lty = ((sign(e.loadings[v, f]) < 0) + 1),
## adj = f%%adj + 1)
## }
## }
## }
## }
## }
## <bytecode: 0x7fd3eaac6510>
## <environment: namespace:psych>
(resfax)
## Factor Analysis using method = minres
## Call: fa(r = Choclo, nfactors = 2, rotate = "varimax", fm = "minres",
## cor = "mixed")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## Political_sta 0.78 -0.15 0.626 0.374 1.1
## Voice_acco 0.76 0.02 0.580 0.420 1.0
## Ruleoflaw 0.97 -0.13 0.962 0.038 1.0
## Control_co 0.94 -0.11 0.896 0.104 1.0
## Regulatory_quality 0.93 -0.18 0.907 0.093 1.1
## Tasadesempleo -0.09 0.99 0.981 0.019 1.0
## Densidadpob 0.16 -0.10 0.035 0.965 1.7
## Ayuda Económica -0.09 0.99 0.981 0.019 1.0
## HDI 0.80 -0.29 0.729 0.271 1.2
## Poburbana 0.59 -0.17 0.372 0.628 1.2
## infoalawk -0.14 0.16 0.045 0.955 1.9
## Rigurosidad -0.39 0.20 0.192 0.808 1.5
## GEE 0.95 -0.22 0.953 0.047 1.1
## PPP_2018 0.81 -0.19 0.699 0.301 1.1
##
## MR1 MR2
## SS loadings 6.65 2.31
## Proportion Var 0.48 0.16
## Cumulative Var 0.48 0.64
## Proportion Explained 0.74 0.26
## Cumulative Proportion 0.74 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 91 and the objective function was 36.16 with Chi Square of 4321.29
## The degrees of freedom for the model are 64 and the objective function was 20.08
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic number of observations is 126 with the empirical chi square 100.59 with prob < 0.0024
## The total number of observations was 126 with Likelihood Chi Square = 2372.68 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.215
## RMSEA index = 0.535 and the 90 % confidence intervals are 0.519 0.556
## BIC = 2063.16
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.99 0.99
## Multiple R square of scores with factors 0.99 0.99
## Minimum correlation of possible factor scores 0.98 0.97
resfax$crms
## [1] 0.07897521
resfax$RMSEA
## RMSEA lower upper confidence
## 0.5350055 0.5187986 0.5558288 0.9000000
resfax$TLI
## [1] 0.2150672
sort(resfax$communality)
## Densidadpob infoalawk Rigurosidad Poburbana
## 0.03494687 0.04508238 0.19172250 0.37237009
## Voice_acco Political_sta PPP_2018 HDI
## 0.58000207 0.62582310 0.69873428 0.72855678
## Control_co Regulatory_quality GEE Ruleoflaw
## 0.89633355 0.90696984 0.95343952 0.96160215
## Tasadesempleo Ayuda Económica
## 0.98114563 0.98114563
sort(resfax$complexity)
## Voice_acco Tasadesempleo Ayuda Económica Control_co
## 1.001057 1.016493 1.016493 1.026979
## Ruleoflaw Regulatory_quality Political_sta PPP_2018
## 1.036554 1.074820 1.079650 1.107067
## GEE Poburbana HDI Rigurosidad
## 1.109477 1.171243 1.249030 1.503459
## Densidadpob infoalawk
## 1.677688 1.929564
¿Qué nombres les darías?
resfa_casosx<-as.data.frame(resfax$scores)
head(resfa_casosx)
## MR1 MR2
## Afghanistan -1.5525617 1.1580957
## Angola -1.2415497 -0.4539761
## Albania -0.1147450 0.3788696
## Andorra 1.4792339 -0.6673443
## United Arab Emirates 0.9684772 -0.8986776
## Argentina -0.3721714 -0.2890792
summary(resfa_casosx)
## MR1 MR2
## Min. :-2.0414 Min. :-1.1966
## 1st Qu.:-0.7337 1st Qu.:-0.5330
## Median :-0.1731 Median :-0.2425
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.6428 3rd Qu.: 0.1613
## Max. : 2.0550 Max. : 6.3915
PASITO A PASITO
Calculemos matriz de correlación:
names(Data)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
theData = Data
theData = Data[, c(1:5,8,15:17)]
table(theData$`Ayuda Económica`)
##
## Sin apoyo Menos del 50% del sueldo
## 119 7
str(theData)
## 'data.frame': 126 obs. of 9 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality: num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Ayuda Económica : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
names(theData)=c("Political stability", "Voice and accountability", "Rule of law", "Control Corruption", "Regulatory Quality", "Ayuda economica", "Campañas informativas", "Rigurosidad", "GEE")
Analizamos
lapiz=polycor::hetcor(theData)$correlations
Exploramos correlaciones:
ggcorrplot(lapiz)
#evaluandos ignificancia
ggcorrplot(lapiz,
p.mat = cor_pmat(lapiz),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
Verificamos si los datos se pueden factorizar
psych::KMO(lapiz)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = lapiz)
## Overall MSA = 0.83
## MSA for each item =
## Political stability Voice and accountability Rule of law
## 0.90 0.90 0.85
## Control Corruption Regulatory Quality Ayuda economica
## 0.83 0.82 0.35
## Campañas informativas Rigurosidad GEE
## 0.53 0.74 0.86
Verificamos si la matriz de correlaciones es adecuada
cortest.bartlett(lapiz,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(lapiz)
## [1] FALSE
Determinamos en cuántos factores o variables latentes podriamos redimensionar la data
theData$`Ayuda economica` = as.numeric(theData$`Ayuda economica`)
theData$`Campañas informativas` = as.numeric(theData$`Campañas informativas`)
fa.parallel(theData, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
REDIMENSIONAMOS A UN NÚMERO MENOR DE FACTORES
Resultado inicial:
mandarina <- fa(theData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## mixed.cor is deprecated, please use mixedCor.
print(mandarina$loadings)
##
## Loadings:
## MR1 MR2
## Political stability 0.840
## Voice and accountability 0.792
## Rule of law 0.963 -0.217
## Control Corruption 0.946 -0.135
## Regulatory Quality 0.926 -0.179
## Ayuda economica 0.482
## Campañas informativas 0.481
## Rigurosidad -0.242 0.979
## GEE 0.931 -0.256
##
## MR1 MR2
## SS loadings 4.942 1.585
## Proportion Var 0.549 0.176
## Cumulative Var 0.549 0.725
Resultado visual
fa.diagram(mandarina, main = c("Gráfico 1: Árbol de factorización de la dimensión de gobernanza y medidas tempranas"))
Evaluamos resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
mandarina$crms
## [1] 0.03658976
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
mandarina$RMSEA
## RMSEA lower upper confidence
## 0.1667304 0.1324592 0.2044561 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
mandarina$TLI
## [1] 0.8956513
¿Qué variables aportaron mas a los factores?
sort(mandarina$communality)
## Ayuda economica Campañas informativas Voice and accountability
## 0.2325703 0.2357622 0.6277258
## Political stability Regulatory Quality Control Corruption
## 0.7061226 0.8892774 0.9138902
## GEE Rule of law Rigurosidad
## 0.9314220 0.9734708 1.0163076
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(mandarina$complexity)
## Ayuda economica Voice and accountability Political stability
## 1.000263 1.000346 1.001687
## Campañas informativas Control Corruption Regulatory Quality
## 1.037678 1.040483 1.074851
## Rule of law Rigurosidad GEE
## 1.101283 1.121989 1.149998
factorial_casos<-as.data.frame(mandarina$scores) #en esta no me sale el factorial
head(factorial_casos)
## MR1 MR2
## Afghanistan -1.8887443 -0.4077092
## Angola -1.1601934 -0.2620495
## Albania 0.0368565 1.7407112
## Andorra 1.4049975 -1.3051795
## United Arab Emirates 0.6920686 -1.2259458
## Argentina -0.5383236 -0.8125784
summary(factorial_casos)
## MR1 MR2
## Min. :-2.0235 Min. :-1.7756
## 1st Qu.:-0.6876 1st Qu.:-0.7682
## Median :-0.2155 Median :-0.2225
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7355 3rd Qu.: 0.6333
## Max. : 1.9438 Max. : 3.2592
Calculamos matriz de correlación:
#Creamos una data con variables seleccionadas por teoría
demo = Data
names(demo)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
demo = (Data[, c(10:13, 18, 6)])
#Convertimos la tasa de desempleo a proporciones
demo$empleo = 100 - (demo$Tasadesempleo)
head(demo)
## EXPECTATIVAVIDA EXPECTCOLE YEARS_SCHOOLING
## Afghanistan 64.49 10.14 3.93
## Angola 60.78 11.78 5.13
## Albania 78.46 15.23 10.05
## Andorra 81.79 13.30 10.16
## United Arab Emirates 77.81 13.64 10.95
## Argentina 76.52 17.64 10.56
## GNI_GROSSNATIONALINCOME PPP_2018 Tasadesempleo empleo
## Afghanistan 1745.67 524.16 24 76
## Angola 5554.70 3289.65 7 93
## Albania 12299.80 5284.38 14 86
## Andorra 48640.89 41793.06 4 96
## United Arab Emirates 66911.66 43839.36 2 98
## Argentina 17611.22 11683.95 8 92
demo$Tasadesempleo = NULL
Analizamos
pinguino=polycor::hetcor(demo)$correlations
Exploramos correlaciones
ggcorrplot(pinguino)
#evaluandos ignificancia
ggcorrplot(pinguino,
p.mat = cor_pmat(pinguino),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
Verificamos si los datos se pueden factorizar
psych::KMO(pinguino)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = pinguino)
## Overall MSA = 0.83
## MSA for each item =
## EXPECTATIVAVIDA EXPECTCOLE YEARS_SCHOOLING
## 0.89 0.84 0.87
## GNI_GROSSNATIONALINCOME PPP_2018 empleo
## 0.76 0.77 0.93
Verificamos si la matriz de correlaciones es adecuada
cortest.bartlett(pinguino,n=nrow(demo))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(pinguino)
## [1] FALSE
Determinar en cuántos factores o variables latentes podriamos redimensionar la data
fa.parallel(demo, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
REDIMENSIONAMOS A UN NÚMERO MENOR DE FACTORES
Resultado inicial:
alfalfa <- fa(demo,nfactors = 1,cor = 'mixed',rotate ="varimax",fm="minres")
## mixed.cor is deprecated, please use mixedCor.
print(alfalfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1
## EXPECTATIVAVIDA 0.864
## EXPECTCOLE 0.855
## YEARS_SCHOOLING 0.862
## GNI_GROSSNATIONALINCOME 0.839
## PPP_2018 0.825
## empleo
##
## MR1
## SS loadings 3.731
## Proportion Var 0.622
Resultado visual
fa.diagram(alfalfa, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
alfalfa$crms
## [1] 0.09378219
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
alfalfa$RMSEA
## RMSEA lower upper confidence
## 0.2930635 0.2456668 0.3457595 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
alfalfa$TLI
## [1] 0.7117268
¿Qué variables aportaron mas a los factores?
sort(alfalfa$communality)
## empleo PPP_2018 GNI_GROSSNATIONALINCOME
## 0.1260794 0.6811339 0.7043395
## EXPECTCOLE YEARS_SCHOOLING EXPECTATIVAVIDA
## 0.7308917 0.7424977 0.7462551
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(alfalfa$complexity)
## EXPECTATIVAVIDA EXPECTCOLE PPP_2018
## 1 1 1
## YEARS_SCHOOLING GNI_GROSSNATIONALINCOME empleo
## 1 1 1
factorial_casos<-as.data.frame(alfalfa$scores) #en esta no me sale el factorial
head(factorial_casos)
## MR1
## Afghanistan -1.2751293
## Angola -1.0796263
## Albania 0.1860176
## Andorra 0.8835804
## United Arab Emirates 1.0710668
## Argentina 0.4568132
summary(factorial_casos)
## MR1
## Min. :-1.77527
## 1st Qu.:-0.77311
## Median :-0.02028
## Mean : 0.00000
## 3rd Qu.: 0.67733
## Max. : 1.94743
DataDer=cbind(Data[1],as.data.frame(resfax$scores))
Data$experimento=normalize(DataDer$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
Data$experimenton=normalize(DataDer$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
AJA=cbind(Data[1],as.data.frame(mandarina$scores))
Data$Gobernanza= normalize(AJA$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
Data$Medidas_tempranas=normalize(AJA$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
EJE=cbind(Data[1],as.data.frame(alfalfa$scores))
Data$estructural= normalize(EJE$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
TAMBIÉN PASITO A PASITO
Creamos un dataframe con lo que se va utilizar por teoría en la regresión
data_regre=Data
names(data_regre)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
## [21] "experimento" "experimenton"
## [23] "Gobernanza" "Medidas_tempranas"
## [25] "estructural"
data_regre$pobla = NULL
str(data_regre)
## 'data.frame': 126 obs. of 24 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality : num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Tasadesempleo : int 24 7 14 4 2 8 6 6 5 7 ...
## $ Densidadpob : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Ayuda Económica : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ HDI : num 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ Poburbana : num 25.8 66.2 61.2 88 86.8 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
## $ Valor100 : num 0.043397 0.000814 0.058581 1.104457 0.166214 ...
## $ experimento : num 1.19 1.95 4.7 8.59 7.35 ...
## $ experimenton : num 3.103 0.979 2.076 0.697 0.393 ...
## $ Gobernanza : num 0.34 2.18 5.19 8.64 6.84 ...
## $ Medidas_tempranas : num 2.717 3.006 6.984 0.934 1.092 ...
## $ estructural : num 1.34 1.87 5.27 7.14 7.65 ...
## - attr(*, "na.action")= 'omit' Named int [1:3] 39 107 112
## ..- attr(*, "names")= chr [1:3] "39" "116" "121"
names(data_regre)=c("Political stability", "Voice and accountability", "Rule of law", "Control Corruption", "Regulatory Quality", "Tasa de desempleo", "Densidad de la poblacion", "Ayuda economica", "IDH", "Expectativa de vida", "Expectativa de años de escolaridad", "Promedio de años de escolaridad", "Renta Nacional", "Poblacion urbana", "Campañas informativas", "Rigurosidad", "GEE", "PBI per capita", "Contagiados", "Experimento", "Experimenton","Gobernanza", "Medidas tempranas", "Estructural")
Realizamos las regresiones
#todas las variables (IDH como variable)
Hipotesis = formula(Contagiados~data_regre$`Political stability` + data_regre$`Voice and accountability` + data_regre$`Rule of law` + data_regre$`Control Corruption` + data_regre$`Regulatory Quality` + data_regre$`Tasa de desempleo` + data_regre$`Densidad de la poblacion` + data_regre$`Ayuda economica` + data_regre$IDH + data_regre$`Poblacion urbana` + data_regre$`Campañas informativas` + data_regre$Rigurosidad + data_regre$GEE + data_regre$`PBI per capita`)
regre = lm(Hipotesis, data=data_regre)
summary(regre)
##
## Call:
## lm(formula = Hipotesis, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39800 -0.10782 -0.02222 0.06540 1.49170
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.314e-02 2.152e-01 0.061 0.95140
## data_regre$`Political stability` 8.115e-02 4.149e-02 1.956 0.05301
## data_regre$`Voice and accountability` -1.329e-01 4.153e-02 -3.201 0.00179
## data_regre$`Rule of law` 2.462e-02 1.006e-01 0.245 0.80710
## data_regre$`Control Corruption` -5.884e-02 7.731e-02 -0.761 0.44824
## data_regre$`Regulatory Quality` 1.428e-01 7.817e-02 1.827 0.07044
## data_regre$`Tasa de desempleo` 1.141e-03 2.191e-03 0.521 0.60371
## data_regre$`Densidad de la poblacion` -3.532e-05 3.052e-05 -1.157 0.24958
## data_regre$`Ayuda economica`.L -1.605e-02 6.683e-02 -0.240 0.81062
## data_regre$IDH -2.625e-01 3.261e-01 -0.805 0.42260
## data_regre$`Poblacion urbana` 3.716e-03 1.427e-03 2.605 0.01046
## data_regre$`Campañas informativas`.L 8.700e-03 4.901e-02 0.178 0.85942
## data_regre$`Campañas informativas`.Q -5.581e-02 5.112e-02 -1.092 0.27729
## data_regre$Rigurosidad -7.098e-04 1.050e-03 -0.676 0.50026
## data_regre$GEE -1.204e-01 9.775e-02 -1.231 0.22086
## data_regre$`PBI per capita` 7.446e-06 1.793e-06 4.154 6.49e-05
##
## (Intercept)
## data_regre$`Political stability` .
## data_regre$`Voice and accountability` **
## data_regre$`Rule of law`
## data_regre$`Control Corruption`
## data_regre$`Regulatory Quality` .
## data_regre$`Tasa de desempleo`
## data_regre$`Densidad de la poblacion`
## data_regre$`Ayuda economica`.L
## data_regre$IDH
## data_regre$`Poblacion urbana` *
## data_regre$`Campañas informativas`.L
## data_regre$`Campañas informativas`.Q
## data_regre$Rigurosidad
## data_regre$GEE
## data_regre$`PBI per capita` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2262 on 110 degrees of freedom
## Multiple R-squared: 0.4233, Adjusted R-squared: 0.3447
## F-statistic: 5.383 on 15 and 110 DF, p-value: 5.273e-08
aca = formula(Contagiados~data_regre$`Political stability` + data_regre$`Voice and accountability` + data_regre$`Rule of law` + data_regre$`Control Corruption` + data_regre$`Regulatory Quality` + data_regre$`Tasa de desempleo` + data_regre$`Densidad de la poblacion` + data_regre$`Ayuda economica` + data_regre$`Expectativa de vida` + data_regre$`Expectativa de años de escolaridad` + data_regre$`Promedio de años de escolaridad` + data_regre$`Renta Nacional` + data_regre$`Poblacion urbana` + data_regre$`Campañas informativas` + data_regre$Rigurosidad + data_regre$GEE + data_regre$`PBI per capita`)
ece = lm(aca, data= data_regre)
summary(ece)
##
## Call:
## lm(formula = aca, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57806 -0.08527 -0.01920 0.05624 0.79078
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -1.339e-01 2.771e-01 -0.483
## data_regre$`Political stability` 5.262e-02 3.454e-02 1.524
## data_regre$`Voice and accountability` 3.875e-03 3.925e-02 0.099
## data_regre$`Rule of law` -1.193e-02 8.291e-02 -0.144
## data_regre$`Control Corruption` -4.560e-02 6.411e-02 -0.711
## data_regre$`Regulatory Quality` 7.356e-02 6.550e-02 1.123
## data_regre$`Tasa de desempleo` 5.430e-04 1.813e-03 0.300
## data_regre$`Densidad de la poblacion` -6.472e-05 2.536e-05 -2.552
## data_regre$`Ayuda economica`.L -1.427e-02 5.518e-02 -0.259
## data_regre$`Expectativa de vida` 6.763e-03 4.290e-03 1.577
## data_regre$`Expectativa de años de escolaridad` -2.772e-02 1.238e-02 -2.238
## data_regre$`Promedio de años de escolaridad` -2.174e-02 1.132e-02 -1.920
## data_regre$`Renta Nacional` 1.618e-05 2.500e-06 6.471
## data_regre$`Poblacion urbana` 1.582e-03 1.173e-03 1.348
## data_regre$`Campañas informativas`.L 2.024e-02 4.041e-02 0.501
## data_regre$`Campañas informativas`.Q -4.140e-02 4.223e-02 -0.980
## data_regre$Rigurosidad -8.623e-04 8.721e-04 -0.989
## data_regre$GEE -1.197e-01 7.849e-02 -1.525
## data_regre$`PBI per capita` -2.126e-06 2.080e-06 -1.022
## Pr(>|t|)
## (Intercept) 0.6300
## data_regre$`Political stability` 0.1305
## data_regre$`Voice and accountability` 0.9215
## data_regre$`Rule of law` 0.8859
## data_regre$`Control Corruption` 0.4785
## data_regre$`Regulatory Quality` 0.2639
## data_regre$`Tasa de desempleo` 0.7651
## data_regre$`Densidad de la poblacion` 0.0121 *
## data_regre$`Ayuda economica`.L 0.7964
## data_regre$`Expectativa de vida` 0.1179
## data_regre$`Expectativa de años de escolaridad` 0.0273 *
## data_regre$`Promedio de años de escolaridad` 0.0575 .
## data_regre$`Renta Nacional` 3.01e-09 ***
## data_regre$`Poblacion urbana` 0.1804
## data_regre$`Campañas informativas`.L 0.6175
## data_regre$`Campañas informativas`.Q 0.3291
## data_regre$Rigurosidad 0.3251
## data_regre$GEE 0.1303
## data_regre$`PBI per capita` 0.3091
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.186 on 107 degrees of freedom
## Multiple R-squared: 0.6208, Adjusted R-squared: 0.557
## F-statistic: 9.732 on 18 and 107 DF, p-value: 2.248e-15
#Gobernanza, medidas tempranas, Estructural, Población urbana y Densidad de la población
efe= formula(Contagiados~ + data_regre$Gobernanza + data_regre$`Medidas tempranas` + data_regre$Estructural + data_regre$`Poblacion urbana` + data_regre$`Densidad de la poblacion`)
afa = lm(efe, data = data_regre)
summary(afa)
##
## Call:
## lm(formula = efe, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32305 -0.12371 -0.02479 0.05341 2.01556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.566e-01 8.444e-02 -1.854 0.0661 .
## data_regre$Gobernanza -1.067e-02 1.669e-02 -0.639 0.5240
## data_regre$`Medidas tempranas` -4.693e-03 1.172e-02 -0.400 0.6895
## data_regre$Estructural 3.691e-02 1.992e-02 1.853 0.0664 .
## data_regre$`Poblacion urbana` 3.128e-03 1.480e-03 2.113 0.0367 *
## data_regre$`Densidad de la poblacion` 5.958e-06 3.080e-05 0.193 0.8470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2482 on 120 degrees of freedom
## Multiple R-squared: 0.2426, Adjusted R-squared: 0.2111
## F-statistic: 7.688 on 5 and 120 DF, p-value: 2.667e-06
#Gobernanza, densidad de la población, renta nacional, expectativa de años de escolaridad (elegido)
MINARISE=formula(Contagiados~data_regre$Gobernanza+data_regre$`Renta Nacional` +data_regre$`Expectativa de años de escolaridad` + data_regre$`Densidad de la poblacion`)
MINARISEM=lm(MINARISE,data=data_regre)
summary(MINARISEM)
##
## Call:
## lm(formula = MINARISE, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63339 -0.08434 -0.02316 0.04935 0.93377
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.548e-01 9.372e-02 3.786
## data_regre$Gobernanza -2.619e-02 1.144e-02 -2.288
## data_regre$`Renta Nacional` 1.488e-05 1.328e-06 11.205
## data_regre$`Expectativa de años de escolaridad` -2.666e-02 8.724e-03 -3.055
## data_regre$`Densidad de la poblacion` -6.015e-05 2.438e-05 -2.467
## Pr(>|t|)
## (Intercept) 0.00024 ***
## data_regre$Gobernanza 0.02387 *
## data_regre$`Renta Nacional` < 2e-16 ***
## data_regre$`Expectativa de años de escolaridad` 0.00277 **
## data_regre$`Densidad de la poblacion` 0.01502 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1886 on 121 degrees of freedom
## Multiple R-squared: 0.5593, Adjusted R-squared: 0.5447
## F-statistic: 38.38 on 4 and 121 DF, p-value: < 2.2e-16
#Gobernanza + renta nacional + expectativa de años de escolaridad + renta nacional + población urbana
MINARISEX=formula(Contagiados~data_regre$Gobernanza+data_regre$`Renta Nacional` +data_regre$`Expectativa de años de escolaridad`+ data_regre$`Poblacion urbana`)
MINARISEME=lm(MINARISEX,data=data_regre)
summary(MINARISEME)
##
## Call:
## lm(formula = MINARISEX, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58747 -0.07887 -0.01999 0.04565 1.03327
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 2.980e-01 9.494e-02 3.139
## data_regre$Gobernanza -2.327e-02 1.175e-02 -1.979
## data_regre$`Renta Nacional` 1.288e-05 1.444e-06 8.921
## data_regre$`Expectativa de años de escolaridad` -3.024e-02 9.619e-03 -3.144
## data_regre$`Poblacion urbana` 1.906e-03 1.152e-03 1.655
## Pr(>|t|)
## (Intercept) 0.00213 **
## data_regre$Gobernanza 0.05004 .
## data_regre$`Renta Nacional` 5.95e-15 ***
## data_regre$`Expectativa de años de escolaridad` 0.00210 **
## data_regre$`Poblacion urbana` 0.10045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1911 on 121 degrees of freedom
## Multiple R-squared: 0.5473, Adjusted R-squared: 0.5324
## F-statistic: 36.58 on 4 and 121 DF, p-value: < 2.2e-16
#Gobernanza, densidad de la población, renta nacional, expectativa de años de escolaridad, población urbana
MINA=formula(Contagiados~data_regre$Gobernanza+data_regre$`Renta Nacional` +data_regre$`Expectativa de años de escolaridad` + data_regre$`Densidad de la poblacion` + data_regre$`Poblacion urbana`)
GIRAIS=lm(MINA,data=data_regre)
summary(GIRAIS)
##
## Call:
## lm(formula = MINA, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63557 -0.08094 -0.01957 0.05382 0.94681
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.322e-01 9.405e-02 3.532
## data_regre$Gobernanza -2.309e-02 1.152e-02 -2.005
## data_regre$`Renta Nacional` 1.382e-05 1.466e-06 9.432
## data_regre$`Expectativa de años de escolaridad` -3.309e-02 9.495e-03 -3.485
## data_regre$`Densidad de la poblacion` -5.959e-05 2.421e-05 -2.461
## data_regre$`Poblacion urbana` 1.867e-03 1.128e-03 1.655
## Pr(>|t|)
## (Intercept) 0.000586 ***
## data_regre$Gobernanza 0.047251 *
## data_regre$`Renta Nacional` 3.87e-16 ***
## data_regre$`Expectativa de años de escolaridad` 0.000688 ***
## data_regre$`Densidad de la poblacion` 0.015264 *
## data_regre$`Poblacion urbana` 0.100528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1872 on 120 degrees of freedom
## Multiple R-squared: 0.5691, Adjusted R-squared: 0.5511
## F-statistic: 31.7 on 5 and 120 DF, p-value: < 2.2e-16
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
Anovita=anova(MINARISEM, MINARISEME, GIRAIS)
stargazer(Anovita,type = 'text',summary = F,title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## =========================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -----------------------------------------
## 1 121 4.303
## 2 121 4.420 0 -0.116
## 3 120 4.207 1 0.212 6.058 0.015
## -----------------------------------------
Dentro de este segmento, jugamos con las alternativas que fueron significativas en los modelos anteriores. El primer resultado fue manejado como el modelo escogido.
stargazer(MINARISEM, afa, regre, ece, type='text')
##
## ===================================================================================================================================
## Dependent variable:
## ----------------------------------------------------------------------------------------------
## Contagiados
## (1) (2) (3) (4)
## -----------------------------------------------------------------------------------------------------------------------------------
## Gobernanza -0.026** -0.011
## (0.011) (0.017)
##
## `Renta Nacional` 0.00001*** 0.00002***
## (0.00000) (0.00000)
##
## `Expectativa de años de escolaridad` -0.027*** -0.028**
## (0.009) (0.012)
##
## `Medidas tempranas` -0.005
## (0.012)
##
## Estructural 0.037*
## (0.020)
##
## `Promedio de años de escolaridad` -0.022*
## (0.011)
##
## `Political stability` 0.081* 0.053
## (0.041) (0.035)
##
## `Voice and accountability` -0.133*** 0.004
## (0.042) (0.039)
##
## `Rule of law` 0.025 -0.012
## (0.101) (0.083)
##
## `Control Corruption` -0.059 -0.046
## (0.077) (0.064)
##
## `Regulatory Quality` 0.143* 0.074
## (0.078) (0.066)
##
## `Tasa de desempleo` 0.001 0.001
## (0.002) (0.002)
##
## `Poblacion urbana` 0.003** 0.004** 0.002
## (0.001) (0.001) (0.001)
##
## `Campañas informativas`.L 0.009 0.020
## (0.049) (0.040)
##
## `Campañas informativas`.Q -0.056 -0.041
## (0.051) (0.042)
##
## Rigurosidad -0.001 -0.001
## (0.001) (0.001)
##
## GEE -0.120 -0.120
## (0.098) (0.078)
##
## `PBI per capita` 0.00001*** -0.00000
## (0.00000) (0.00000)
##
## `Densidad de la poblacion` -0.0001** 0.00001 -0.00004 -0.0001**
## (0.00002) (0.00003) (0.00003) (0.00003)
##
## `Ayuda economica`.L -0.016 -0.014
## (0.067) (0.055)
##
## IDH -0.262
## (0.326)
##
## `Expectativa de vida` 0.007
## (0.004)
##
## Constant 0.355*** -0.157* 0.013 -0.134
## (0.094) (0.084) (0.215) (0.277)
##
## -----------------------------------------------------------------------------------------------------------------------------------
## Observations 126 126 126 126
## R2 0.559 0.243 0.423 0.621
## Adjusted R2 0.545 0.211 0.345 0.557
## Residual Std. Error 0.189 (df = 121) 0.248 (df = 120) 0.226 (df = 110) 0.186 (df = 107)
## F Statistic 38.384*** (df = 4; 121) 7.688*** (df = 5; 120) 5.383*** (df = 15; 110) 9.732*** (df = 18; 107)
## ===================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
el_elegido = MINARISEM
stargazer(MINARISEM, type = "text")
##
## ================================================================
## Dependent variable:
## ---------------------------
## Contagiados
## ----------------------------------------------------------------
## Gobernanza -0.026**
## (0.011)
##
## `Renta Nacional` 0.00001***
## (0.00000)
##
## `Expectativa de años de escolaridad` -0.027***
## (0.009)
##
## `Densidad de la poblacion` -0.0001**
## (0.00002)
##
## Constant 0.355***
## (0.094)
##
## ----------------------------------------------------------------
## Observations 126
## R2 0.559
## Adjusted R2 0.545
## Residual Std. Error 0.189 (df = 121)
## F Statistic 38.384*** (df = 4; 121)
## ================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Un paréntesis necesario para abrir más librerías
library(ggpubr) #gráfico para ver normalidad
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(scatterplot3d)
library(stargazer)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Empezamos con linealidad
plot(el_elegido, 1, main = c("Gráfico 2: Linealidad"), sub = "MODELO 4") #diagonal, casi lineal
Continuamos con homocedasticidad
plot(el_elegido, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal
bptest(el_elegido) #valor P mayor a 0.05 Homocedasticidad
##
## studentized Breusch-Pagan test
##
## data: el_elegido
## BP = 52.403, df = 4, p-value = 1.136e-10
Sigue normalidad de residuos. Puntos cerca de la diagonal.
plot(el_elegido, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal
shapiro.test(el_elegido$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos
##
## Shapiro-Wilk normality test
##
## data: el_elegido$residuals
## W = 0.81063, p-value = 1.884e-11
Última prueba
VIF(el_elegido)
## data_regre$Gobernanza
## 2.870339
## data_regre$`Renta Nacional`
## 2.478354
## data_regre$`Expectativa de años de escolaridad`
## 2.287552
## data_regre$`Densidad de la poblacion`
## 1.128146
ANALIZAMOS VALORES INFLUYENTES
Prestar atención al indice de Cook.
plot(el_elegido, 5, main = c("Gráfico 5: Identificación de valores influyentes"))
checkMINARISA=as.data.frame(influence.measures(el_elegido)$is.inf)
## Warning in abbreviate(vn): abbreviate used with non-ASCII chars
checkMINARISA[checkMINARISA$cook.d | checkMINARISA$hat,] #120, 124
## dfb.1_ dfb.d_$G dfb.d_$N dfb.ddade dfb.ddlp dffit cov.r cook.d hat
## Kuwait FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE
## Qatar TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## Singapore FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
#ANEXOS
Rick = formula(Contagiados~ Experimento + Experimenton)
Morty = lm(Rick, data=data_regre)
summary(Morty)
##
## Call:
## lm(formula = Rick, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30618 -0.11700 -0.05005 0.00959 2.20319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01867 0.06037 -0.309 0.758
## Experimento 0.04074 0.00962 4.234 4.45e-05 ***
## Experimenton -0.02532 0.01783 -1.420 0.158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2613 on 123 degrees of freedom
## Multiple R-squared: 0.1399, Adjusted R-squared: 0.1259
## F-statistic: 10 on 2 and 123 DF, p-value: 9.427e-05
summary(data_regre$Contagiados)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0002622 0.0116779 0.0466983 0.1444046 0.1738542 2.4289680
data_regre$Contagiados=data_regre$Contagiados/2.43
library(betareg)
modelBeta=betareg(data_regre$Contagiados~data_regre$Gobernanza+data_regre$`Expectativa de años de escolaridad`+data_regre$`Renta Nacional`+data_regre$`Poblacion urbana`)
summary(modelBeta)
##
## Call:
## betareg(formula = data_regre$Contagiados ~ data_regre$Gobernanza + data_regre$`Expectativa de años de escolaridad` +
## data_regre$`Renta Nacional` + data_regre$`Poblacion urbana`)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.5263 -0.3165 0.0937 0.4748 7.4819
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value
## (Intercept) -2.963e+00 4.392e-01 -6.746
## data_regre$Gobernanza -1.011e-01 5.122e-02 -1.974
## data_regre$`Expectativa de años de escolaridad` -6.949e-02 4.229e-02 -1.643
## data_regre$`Renta Nacional` 4.926e-05 5.764e-06 8.547
## data_regre$`Poblacion urbana` 8.570e-03 5.377e-03 1.594
## Pr(>|z|)
## (Intercept) 1.52e-11 ***
## data_regre$Gobernanza 0.0484 *
## data_regre$`Expectativa de años de escolaridad` 0.1003
## data_regre$`Renta Nacional` < 2e-16 ***
## data_regre$`Poblacion urbana` 0.1110
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 12.982 1.927 6.738 1.6e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 279.9 on 6 Df
## Pseudo R-squared: 0.5347
## Number of iterations: 26 (BFGS) + 4 (Fisher scoring)
Bajo el esquema del EFA
SEM = Data
str(SEM)
## 'data.frame': 126 obs. of 25 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality : num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Tasadesempleo : int 24 7 14 4 2 8 6 6 5 7 ...
## $ Densidadpob : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Ayuda Económica : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ HDI : num 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ Poburbana : num 25.8 66.2 61.2 88 86.8 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
## $ Valor100 : num 0.043397 0.000814 0.058581 1.104457 0.166214 ...
## $ pobla : num 38041754 31825295 2854191 77142 9770529 ...
## $ experimento : num 1.19 1.95 4.7 8.59 7.35 ...
## $ experimenton : num 3.103 0.979 2.076 0.697 0.393 ...
## $ Gobernanza : num 0.34 2.18 5.19 8.64 6.84 ...
## $ Medidas_tempranas : num 2.717 3.006 6.984 0.934 1.092 ...
## $ estructural : num 1.34 1.87 5.27 7.14 7.65 ...
## - attr(*, "na.action")= 'omit' Named int [1:3] 39 107 112
## ..- attr(*, "names")= chr [1:3] "39" "116" "121"
names(SEM)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
## [21] "experimento" "experimenton"
## [23] "Gobernanza" "Medidas_tempranas"
## [25] "estructural"
SEM$Tasadesempleo = as.numeric(SEM$Tasadesempleo)
SEM$`Ayuda Económica`= as.numeric(SEM$`Ayuda Económica`)
SEM$infoalawk = as.numeric(SEM$infoalawk)
names(SEM)[8]="APOYOECO"
HappyDemoScaled=scale(SEM[,-c( 9,20:25)])
model <- '# describiendo las latentes:
gobernanza =~ Political_sta + Voice_acco + Ruleoflaw + Control_co + Regulatory_quality + GEE
medidas =~ infoalawk + Rigurosidad + APOYOECO
estructural =~ EXPECTATIVAVIDA + EXPECTCOLE + YEARS_SCHOOLING + GNI_GROSSNATIONALINCOME
# regresion con las latentes:
Valor100~gobernanza + medidas + estructural + Densidadpob + Poburbana'
#instalaste "lavaan"?
library(lavaan)
## This is lavaan 0.6-7
## lavaan is BETA software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
## The following object is masked from 'package:matrixcalc':
##
## vech
fit <- sem(model, data=HappyDemoScaled)
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Todo se ha guardado en fit. Primero recupero los parametros estimados:
# conseguir los parametros
allParamSEM=parameterEstimates(fit,standardized = T)
allParamSEM[allParamSEM$op=="~",]
## lhs op rhs est se z pvalue ci.lower ci.upper std.lv
## 14 Valor100 ~ gobernanza 0.270 0.216 1.253 0.210 -0.152 0.693 0.219
## 15 Valor100 ~ medidas -0.111 0.193 -0.576 0.565 -0.490 0.267 -0.034
## 16 Valor100 ~ estructural -0.155 0.206 -0.750 0.453 -0.559 0.250 -0.135
## 17 Valor100 ~ Densidadpob 0.017 0.080 0.216 0.829 -0.139 0.173 0.017
## 18 Valor100 ~ Poburbana 0.410 0.080 5.155 0.000 0.254 0.566 0.410
## std.all std.nox
## 14 0.223 0.223
## 15 -0.035 -0.035
## 16 -0.137 -0.137
## 17 0.017 0.018
## 18 0.417 0.419
allFitSEM=as.list(fitMeasures(fit))
Test 1: El ChiSquare es NO significativo? (queremos que NO lo sea)
allFitSEM[c("chisq", "pvalue")] # pvalue>0.05
## $chisq
## [1] 415.9929
##
## $pvalue
## [1] 0
Test 2: El Índice Tucker Lewis es mayor a 0.9? (queremos que sea mayor).
allFitSEM$tli # > 0.90
## [1] 0.806613
La Raíz del error cuadrático medio de aproximación es menor a 0.05?
allFitSEM[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')]
## $rmsea.ci.lower
## [1] 0.1447474
##
## $rmsea
## [1] 0.1604761
##
## $rmsea.ci.upper
## [1] 0.1765382
Bajo el esquema del EFA
str(SEM)
## 'data.frame': 126 obs. of 25 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality : num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Tasadesempleo : num 24 7 14 4 2 8 6 6 5 7 ...
## $ Densidadpob : num 56.9 24.7 104.6 163.8 135.6 ...
## $ APOYOECO : num 1 1 1 1 1 1 1 1 1 1 ...
## $ HDI : num 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ Poburbana : num 25.8 66.2 61.2 88 86.8 ...
## $ infoalawk : num 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
## $ Valor100 : num 0.043397 0.000814 0.058581 1.104457 0.166214 ...
## $ pobla : num 38041754 31825295 2854191 77142 9770529 ...
## $ experimento : num 1.19 1.95 4.7 8.59 7.35 ...
## $ experimenton : num 3.103 0.979 2.076 0.697 0.393 ...
## $ Gobernanza : num 0.34 2.18 5.19 8.64 6.84 ...
## $ Medidas_tempranas : num 2.717 3.006 6.984 0.934 1.092 ...
## $ estructural : num 1.34 1.87 5.27 7.14 7.65 ...
## - attr(*, "na.action")= 'omit' Named int [1:3] 39 107 112
## ..- attr(*, "names")= chr [1:3] "39" "116" "121"
names(SEM)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "APOYOECO"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
## [21] "experimento" "experimenton"
## [23] "Gobernanza" "Medidas_tempranas"
## [25] "estructural"
Scaled=scale(SEM[,-c( 9,20:25)])
modelito <- '# describiendo las latentes:
gobernanza1 =~ Political_sta + Voice_acco + Ruleoflaw + Control_co + Regulatory_quality + GEE
medidas1 =~ infoalawk + Rigurosidad + APOYOECO
estructural1 =~ EXPECTATIVAVIDA + EXPECTCOLE + YEARS_SCHOOLING + GNI_GROSSNATIONALINCOME
demografia1 =~ Densidadpob + Poburbana'
#instalaste "lavaan"?
library(lavaan)
cfa_fit = cfa(modelito, data=Scaled,
std.lv=TRUE,
missing="fiml")
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
TESTS
allParamCFA=parameterEstimates(cfa_fit,standardized = T)
allFitCFA=as.list(fitMeasures(cfa_fit))
library(knitr)
kable(allParamCFA[allParamCFA$op=="=~",])
| lhs | op | rhs | est | se | z | pvalue | ci.lower | ci.upper | std.lv | std.all | std.nox |
|---|---|---|---|---|---|---|---|---|---|---|---|
| gobernanza1 | =~ | Political_sta | 0.8093461 | 0.0729109 | 11.100486 | 0.0000000 | 0.6664434 | 0.9522487 | 0.8093461 | 0.8125772 | 0.8125772 |
| gobernanza1 | =~ | Voice_acco | 0.7615433 | 0.0749855 | 10.155877 | 0.0000000 | 0.6145745 | 0.9085121 | 0.7615433 | 0.7645835 | 0.7645835 |
| gobernanza1 | =~ | Ruleoflaw | 0.9802122 | 0.0638197 | 15.359093 | 0.0000000 | 0.8551280 | 1.1052965 | 0.9802122 | 0.9841254 | 0.9841254 |
| gobernanza1 | =~ | Control_co | 0.9510709 | 0.0655755 | 14.503440 | 0.0000000 | 0.8225452 | 1.0795966 | 0.9510709 | 0.9548677 | 0.9548677 |
| gobernanza1 | =~ | Regulatory_quality | 0.9442297 | 0.0659972 | 14.307116 | 0.0000000 | 0.8148776 | 1.0735819 | 0.9442297 | 0.9479992 | 0.9479992 |
| gobernanza1 | =~ | GEE | 0.9667811 | 0.0646554 | 14.952828 | 0.0000000 | 0.8400588 | 1.0935033 | 0.9667811 | 0.9706405 | 0.9706405 |
| medidas1 | =~ | infoalawk | 0.2976537 | 0.1237921 | 2.404466 | 0.0161961 | 0.0550258 | 0.5402817 | 0.2976537 | 0.2988418 | 0.2988418 |
| medidas1 | =~ | Rigurosidad | 1.3466748 | 0.4140978 | 3.252070 | 0.0011457 | 0.5350581 | 2.1582915 | 1.3466748 | 1.3520505 | 1.3520505 |
| medidas1 | =~ | APOYOECO | 0.1827824 | 0.0910295 | 2.007948 | 0.0446489 | 0.0043679 | 0.3611968 | 0.1827824 | 0.1835120 | 0.1835120 |
| estructural1 | =~ | EXPECTATIVAVIDA | 0.8694604 | 0.0710074 | 12.244650 | 0.0000000 | 0.7302885 | 1.0086323 | 0.8694604 | 0.8729311 | 0.8729311 |
| estructural1 | =~ | EXPECTCOLE | 0.8881422 | 0.0701978 | 12.652002 | 0.0000000 | 0.7505571 | 1.0257273 | 0.8881422 | 0.8916876 | 0.8916876 |
| estructural1 | =~ | YEARS_SCHOOLING | 0.8601926 | 0.0717748 | 11.984609 | 0.0000000 | 0.7195166 | 1.0008686 | 0.8601926 | 0.8636263 | 0.8636263 |
| estructural1 | =~ | GNI_GROSSNATIONALINCOME | 0.8004890 | 0.0752024 | 10.644465 | 0.0000000 | 0.6530950 | 0.9478829 | 0.8004890 | 0.8036845 | 0.8036845 |
| demografia1 | =~ | Densidadpob | 0.1711704 | 0.0965991 | 1.771966 | 0.0764003 | -0.0181605 | 0.3605012 | 0.1711704 | 0.1718537 | 0.1718537 |
| demografia1 | =~ | Poburbana | 0.8640586 | 0.2121843 | 4.072208 | 0.0000466 | 0.4481850 | 1.2799321 | 0.8640586 | 0.8675079 | 0.8675079 |
CHI2 El ChiSquare es NO significativo (p_value debe ser mayor a 0.05 para que sea bueno)?
allFitCFA[c("chisq", "df", "pvalue")] # pvalue>0.05
## $chisq
## [1] 236.8741
##
## $df
## [1] 84
##
## $pvalue
## [1] 2.220446e-16
El Índice Tucker Lewi es mayor a 0.9?
allFitCFA$tli # > 0.90
## [1] 0.8990895
La Raíz del error cuadrático medio de aproximación es menor a 0.05?
allFitCFA[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')] # 0.05 en el Int de Conf?
## $rmsea.ci.lower
## [1] 0.1022819
##
## $rmsea
## [1] 0.1201827
##
## $rmsea.ci.upper
## [1] 0.1383808
Añadimos los índices a la data de indicadores:
SEM=as.data.frame(cbind(SEM,lavPredict(cfa_fit)))
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
MODELO EN REGRESION NORMAL
#Gobernanza, medidas tempranas, Estructural, Población urbana y Densidad de la población
sueño = formula(Valor100~ gobernanza1 + medidas1 + estructural1 + demografia1 )
dormir = lm(sueño, data = SEM)
summary(dormir)
##
## Call:
## lm(formula = sueño, data = SEM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33297 -0.11455 -0.03483 0.05961 2.08329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14440 0.02238 6.453 2.38e-09 ***
## gobernanza1 0.09982 0.06732 1.483 0.14073
## medidas1 -0.01109 0.01631 -0.680 0.49775
## estructural1 -0.18256 0.12493 -1.461 0.14653
## demografia1 0.23452 0.08328 2.816 0.00568 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2512 on 121 degrees of freedom
## Multiple R-squared: 0.2179, Adjusted R-squared: 0.1921
## F-statistic: 8.43 on 4 and 121 DF, p-value: 4.931e-06