library(rio)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_covid= "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
WorldData<-import(file = data_covid)%>%
mutate(type="datacon")%>%
tidyr::gather(Fecha,Valor,-c(type,"Province/State",
"Country/Region",Lat,Long))
WorldData= WorldData%>%
filter(Valor>0)
#Convertimos a formato de fecha
WorldData$Fecha=mdy(WorldData$Fecha)
WorldData$Fecha=as.Date(WorldData$Fecha)
Ponemos un nombre
names(WorldData)[2]="Country"
Juntamos provincias en paises
WorldData=aggregate(Valor
~ Country + Fecha,
data = WorldData,
sum)
Nombramos bien Egipto
WorldData$Country=gsub('Egypt',"Egypt, Arab Rep.",WorldData$Country)
Un parénteisis necesario para tener el código de cada país Ahora, necesitamos agregar el código a cada país y quedarnos con eso
link1="https://github.com/CarlosGDiez/BasesLimpias/raw/master/Gee_sucio.csv"
oto=import(link1)
oto = oto[,c(1,2)]
names(oto) = c("Country","CODE")
oto=oto[!duplicated(oto), ]
Calculamos el día 100
Dia100=WorldData%>%
group_by(Country)%>%
mutate(dia100=ifelse(Fecha==nth(Fecha,100),1,0))%>%
filter(dia100==1)
#Mergeamos con Oto para el código
Dia100=merge(oto,Dia100, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia100=Dia100[,c(2:4)]
#Nombramos bien el valor
names(Dia100)[2] = "Fecha100"
names(Dia100)[3] = "Valor100"
#Variable mergeable que servirá más adelante
Dia100$DIA100=paste(Dia100$CODE,Dia100$Fecha)
Calculamos el día 7
Dia7=WorldData%>%
group_by(Country)%>%
mutate(dia7= ifelse(Fecha==nth(Fecha,7),1,0))%>%
filter(dia7==1)
#Mergeamos con Oto para el código
Dia7=merge(oto,Dia7, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia7=Dia7[,c(2:4)]
#Nombramos bien el valor
names(Dia7)[2] = "Fecha7"
names(Dia7)[3] = "Valor7"
#Variable mergeable que servirá más adelante
Dia7$DIA7=paste(Dia7$CODE,Dia7$Fecha)
Ahora podemos tocar World data por separado
WorldData=merge(oto,WorldData, by.x = 'Country', by.y='Country')
WorldData$Country = NULL
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse
## The following object is masked from 'package:base':
##
## isFALSE
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(cluster)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
library(dbscan)
library(descr)
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
## The following object is masked from 'package:car':
##
## Recode
## The following object is masked from 'package:BBmisc':
##
## %nin%
library(foreign)
library(fpc)
##
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
##
## dbscan
library(ggcorrplot)
## Loading required package: ggplot2
library(GPArotation)
library(haven)
library(htmltab)
library(jsonlite)
library(matrixcalc)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(PMCMRplus)
library(polycor)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following objects are masked from 'package:DescTools':
##
## AUC, ICC, SD
## The following object is masked from 'package:car':
##
## logit
library(readr)
library(readxl)
library(stringi)
library(stringr)
library(tidyr)
library(tidyverse)
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'
## Also defined by 'Rmpfr'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.4 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x data.table::between() masks dplyr::between()
## x BBmisc::coalesce() masks dplyr::coalesce()
## x BBmisc::collapse() masks dplyr::collapse()
## x lubridate::date() masks base::date()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x data.table::first() masks dplyr::first()
## x purrr::flatten() masks jsonlite::flatten()
## x data.table::hour() masks lubridate::hour()
## x lubridate::intersect() masks base::intersect()
## x data.table::isoweek() masks lubridate::isoweek()
## x dplyr::lag() masks stats::lag()
## x data.table::last() masks dplyr::last()
## x data.table::mday() masks lubridate::mday()
## x data.table::minute() masks lubridate::minute()
## x data.table::month() masks lubridate::month()
## x data.table::quarter() masks lubridate::quarter()
## x car::recode() masks dplyr::recode()
## x data.table::second() masks lubridate::second()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::some() masks car::some()
## x purrr::transpose() masks data.table::transpose()
## x lubridate::union() masks base::union()
## x data.table::wday() masks lubridate::wday()
## x data.table::week() masks lubridate::week()
## x data.table::yday() masks lubridate::yday()
## x data.table::year() masks lubridate::year()
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
#Población Traemos la data de población en cada país
linkedin = "https://github.com/AriannaNKZC/Estad-2/raw/master/%C2%BFSera%20la%20data%3F.xls"
poblacion = import(linkedin)
Nos quedamos con las columnas que nos sirven
poblacion = poblacion[,c(1,2,64)]
Le ponemos nombre
names(poblacion)= c("Country", "CODE", "pobla")
Ahora combinamos las datas de contagios y las de población
WorldData=merge(poblacion,WorldData, by.x = 'CODE', by.y='CODE')
AHORA TRABAJAMOS LAS VARIABLES INDEPENDIENTES
Un parénteisis necesario para tener el código de cada país en español Ahora, necesitamos agregar el código a cada país y quedarnos con eso Traemos la base de datos
CODESPAÑOL<- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
CDSP=import(CODESPAÑOL)
Nos quedamos con las columnas y filas que nos sirven
names(CDSP)=(CDSP[1,])
CDSP = CDSP[-1,]
CDSP = CDSP[,c(1,2)]
Le ponemos nombres
names(CDSP) = c("PAIS", "CODE")
Traemos la data
data_ppp <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_NY.GDP.PCAP.CD_DS2_es_csv_v2_1347337.csv"
ppp_pib =import(data_ppp)
Nos quedamos con las filas y columnas que nos sirven
names(ppp_pib)=(ppp_pib[1,])
ppp_pib = ppp_pib[-1,]
ppp_pib = ppp_pib[,c(2,63)]
Le ponemos nombres
names(ppp_pib) = c("CODE", "PPP_2018")
#Segunda variable: Government Effectiveness Estimate (Índice de la Efectidad de la Gobernanza) Traemos la data (LA MISMA QUE SE USÓ PARA CREAR A OTO)
GEE=import(link1)
Le ponemos nombres
names(GEE) = c("Country","CODE","Series", "SC", "GEE")
Nos quedamos con las filas y columnas que nos sirven
#Filtrar para tomar valor GEE y no el error estandar
GEE=GEE%>%
group_by(Country)%>%
mutate(Index = ifelse(Series==nth(Series,1), 1, 0))%>%
filter(Index==1)
#eliminamos filas vacías
GEE=GEE[-c(215,216,217,218,219),]
## Warning: The `i` argument of ``[.tbl_df`()` must lie in [-rows, 0] if negative, as of tibble 3.0.0.
## Use `NA_integer_` as row index to obtain a row full of `NA` values.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#Columnas necesarias
GEE = GEE[,c(2,5)]
#Tercera variable: Índice de rigurosidad al séptimo día de contagio Traemos las data
link2="https://github.com/CarlosGDiez/BasesLimpias/blob/master/Rigurosidad.csv?raw=true"
Rigurosidad=import(link2)
Nos quedamos con las filas y columnas que nos sirven
Rigurosidad=Rigurosidad[, c(1,2,5,35)]
Les ponemos nombres
names(Rigurosidad) = c("Country", "CODE","Date","Rigurosidad")
Hay que ordenarlos y juntarlos por fechas
Rigurosidad$Date <- ymd(Rigurosidad$Date)
Creamos variables mergeables
Rigurosidad$DIA7=paste(Rigurosidad$CODE,Rigurosidad$Date)
Nos quedamos solo con la información a la semana de contagios
Rigurosidad=merge(Rigurosidad,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
Rigurosidad=Rigurosidad[,c(3,5)]
Nombramos bien las columnas
names(Rigurosidad) = c("CODE","Rigurosidad")
#Cuarta variable: Campañas informativas al séptimo día de contagio Traemos las data
infocamp = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/public-campaigns-covid.csv"
dataic=import(infocamp)
Hay que ordenarlos y juntarlos por fechas
dataic$Date <- ymd(dataic$Date)
Creamos variables mergeables
dataic$DIA7=paste(dataic$Code,dataic$Date)
Nos quedamos solo con la información a la semana de contagios
dataic=merge(dataic,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
dataic=dataic[,c(5,6)]
#Quinta varaible: Población Urbana Traemos la data
xurb = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/API_SP.URB.TOTL.IN.ZS_DS2_es_csv_v2_1347951.csv"
dataxurb=import(xurb)
Reacomodamos el nombre de las columnas
names(dataxurb)=(dataxurb[1,])
Nos quedamos con las columnas y filas que nos sirven
dataxurb=dataxurb[,c(2,64)]
dataxurb=dataxurb[-1,]
Nombramos bien las columnas
names(dataxurb) = c("CODE","Poburbana")
Nombramos bien las filas
dataxurb$num=c(1:264)
rownames(dataxurb)=dataxurb[,3]
dataxurb[,3]= NULL
Redondeamos
dataxurb$Poburbana=round(dataxurb$Poburbana, digits = 2)
#Sexta variable:Índice de Desarrollo Humano (Human Development Index), indicador que integra las variables PBI, Educación y Esperanza de vida Traemos la data
LIDH="https://github.com/CarlaMendozaE/Prueba/blob/master/IDH.xlsx?raw=true"
IDH=import(LIDH)
Nos quedamos con las filas y columnas que nos sirven
IDH[,c(1,8,9)]=NULL
Ponemos nombres
names(IDH) = c("Country","HDI","EXPECTATIVAVIDA","EXPECTCOLE","YEARS_SCHOOLING","GNI_GROSSNATIONALINCOME")
IDH$Country=gsub('Egypt',"Egypt, Arab Rep.",IDH$Country)
Convertimos a numéricas
IDH[,c(2:6)]=lapply(IDH[,c(2:6)], as.numeric)
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion
Redondeamos
IDH[2:6]=round(IDH[,2:6], digits = 2)
Agregamos CODE
IDH=merge(oto,IDH, by.x = 'Country', by.y='Country')
#Séptima variable: Ayuda económica Traemos la data
linkayuda="https://raw.githubusercontent.com/CarlosGDiez/BasesLimpias/master/Rigurosidad.csv"
dataayuda=import(linkayuda)
Nos quedamos con las filas y columnas que nos sirven
dataayuda = dataayuda[,c(2,5, 21)]
#USA
dataayuda <- dataayuda[-c(48601 :62640), ]
#UK
dataayuda <- dataayuda[-c(16741 :17820), ]
Les ponemos nombres
names(dataayuda) = c("CODE","Date","Ayuda Económica")
Hay que ordenarlos y juntarlos por fechas
dataayuda$Date <- ymd(dataayuda$Date)
Creamos variables mergeables
dataayuda$DIA7=paste(dataayuda$CODE,dataayuda$Date)
Nos quedamos solo con la información a la semana de contagios
dataayuda=merge(dataayuda,Dia7, by.x="DIA7", by.y = "DIA7")
Una vez más, nos quedamos con las filas y columnas que nos sirven
dataayuda = dataayuda[,c(2,4)]
Nombramos bien CODE
names(dataayuda)[1] = "CODE"
#Octava variable: Densidad de la población Traemos la data
linkdensidad="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/Excel%20densidad.xlsx.xls"
datadensidad=import(linkdensidad)
## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
Reacomodamos el nombre de las columnas
names(datadensidad)=(datadensidad[3,])
Nos quedamos con las filas y columnas que nos sirven
datadensidad = datadensidad[,c(2, 63)]
datadensidad <- datadensidad[-c(1:3),]
Ponemos nombres
names(datadensidad) = c("CODE","Densidadpob")
Convertimos a numéricas
datadensidad$Densidadpob=as.numeric(datadensidad$Densidadpob)
Redondeamos
datadensidad$Densidadpob=round(datadensidad$Densidadpob, digits = 2)
#Novena variable: Tasa de desempleo Traemos la data
datadesempleo <- "https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/datadesempleooriginal.csv"
datadesempleo=import(datadesempleo)
Le ponemos nombre
names(datadesempleo)= c("PAIS", "Tasadesempleo")
datadesempleo$PAIS=gsub("Egipto","Egipto, República Árabe de",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Benín","Benin",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bahráin","Bahrein",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bosnia y Hercegovina","Bosnia y Herzegovina",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bután","Bhután",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Botsuana","Botswana",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kazajistán","Kazajstán",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kenia","Kenya",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Lesoto","Lesotho",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Malaui","Malawi",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Nueva Zelanda","Nueva Zelandia",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Ruanda","Rwanda",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Arabia Saudí","Arabia Saudita",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Surinam","Suriname",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Zimbabue","Zimbabwe",datadesempleo$PAIS)
Agregamos CODE
datadesempleo=merge(CDSP,datadesempleo, by.x = 'PAIS', by.y='PAIS')
#Décima variable: Regulatory quality Traemos la data
perro = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/258c45e7-1b68-4b8e-853d-a2554f1bb145_Data.csv"
regulatory = import(perro)
Nos quedamos con las filas y columnas que nos sirven
regulatory=regulatory[, c(2,5)]
Ponemos nombres
names(regulatory) = c("CODE","Regulatory_quality")
Convertimos a numéricas
regulatory$Regulatory_quality=as.numeric(regulatory$Regulatory_quality)
## Warning: NAs introduced by coercion
Redondeamos
regulatory$Regulatory_quality=round(regulatory$Regulatory_quality, digits = 2)
#Undécima variable: Control de la corrupción Traemos la data
gato= "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/51253f2e-7374-408f-8685-c729a64d043a_Data.csv"
control_co = import(gato)
Nos quedamos con las filas y columnas que nos sirven
control_co=control_co[, c(2,5)]
Ponemos nombres
names(control_co) = c("CODE","Control_co")
Convertimos a numéricas
control_co$Control_co=as.numeric(control_co$Control_co)
## Warning: NAs introduced by coercion
Redondeamos
control_co$Control_co=round(control_co$Control_co, digits = 2)
#Duodécima variable: Rule of law Traemos la data
AXA = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/a9249c7d-95ab-4618-9160-3a247dea2bae_Data.csv"
ruleof = import(AXA)
Nos quedamos con las filas y columnas que nos sirven
ruleof=ruleof[, c(2,5)]
Ponemos nombres
names(ruleof) = c("CODE","Ruleoflaw")
Convertimos a numéricas
ruleof$Ruleoflaw=as.numeric(ruleof$Ruleoflaw)
## Warning: NAs introduced by coercion
Redondeamos
ruleof$Ruleoflaw=round(ruleof$Ruleoflaw, digits = 2)
#Décimotercera variable: Voice and accountability Traemos la data
VA = 'https://github.com/AriannaNKZC/Estad-2/raw/master/Voice_and_accountability.csv'
VocA = import(VA)
Nos quedamos con las filas y columnas que nos sirven
VocA=VocA[, c(2,5)]
Ponemos nombres
names(VocA) = c("CODE","Voice_acco")
Convertimos a numéricas
VocA$Voice_acco=as.numeric(VocA$Voice_acco)
## Warning: NAs introduced by coercion
Redondeamos
VocA$Voice_acco=round(VocA$Voice_acco, digits = 2)
#Décimocuarta variable: Political stability Traemos la data
PS='https://github.com/AriannaNKZC/Estad-2/raw/master/e0757e7a-8829-44d2-a7a3-11a580c19a53_Data.csv'
PolS = import(PS)
Nos quedamos con las filas y columnas que nos sirven
PolS=PolS[, c(2,5)]
Ponemos nombres
names(PolS) = c("CODE","Political_sta")
Convertimos a numéricas
PolS$Political_sta=as.numeric(PolS$Political_sta)
## Warning: NAs introduced by coercion
Redondeamos
PolS$Political_sta=round(PolS$Political_sta, digits = 2)
MERGEAMOS TODAS LAS VARIABLES EN UN SOLO DATAFRAME
Data=merge(PolS,VocA, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ruleof, by.x = 'CODE', by.y='CODE')
Data=merge(Data,control_co, by.x = 'CODE', by.y='CODE')
Data=merge(Data,regulatory, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadesempleo, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadensidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataayuda, by.x = 'CODE', by.y='CODE')
Data=merge(Data,IDH, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataxurb, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataic, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Rigurosidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,GEE, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ppp_pib, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Dia100, by.x = 'CODE', by.y='CODE')
Data=merge(Data,poblacion, by.x = 'CODE', by.y='CODE')
Limpiamos
#Eliminamos columnas
names(Data)
## [1] "CODE"
## [2] "Political_sta"
## [3] "Voice_acco"
## [4] "Ruleoflaw"
## [5] "Control_co"
## [6] "Regulatory_quality"
## [7] "PAIS"
## [8] "Tasadesempleo"
## [9] "Densidadpob"
## [10] "Ayuda Económica"
## [11] "Country.x"
## [12] "HDI"
## [13] "EXPECTATIVAVIDA"
## [14] "EXPECTCOLE"
## [15] "YEARS_SCHOOLING"
## [16] "GNI_GROSSNATIONALINCOME"
## [17] "Poburbana"
## [18] "Public information campaigns (OxBSG)"
## [19] "Rigurosidad"
## [20] "GEE"
## [21] "PPP_2018"
## [22] "Fecha100"
## [23] "Valor100"
## [24] "DIA100"
## [25] "Country.y"
## [26] "pobla"
Data=Data[,c(-7,-25,-24)]
#Eliminamos filas repetidas
Data = Data[!duplicated(Data),]
Nombramos bien
names(Data)[10] = "Country"
names(Data)[17] = "infoalawk"
Arreglamos numérica
Data$GEE=as.numeric(Data$GEE)
## Warning: NAs introduced by coercion
Redondeamos
Data$GEE=round(Data$GEE, digits = 2)
Data$PPP_2018=round(Data$PPP_2018, digits = 2)
Arreglamos ordinales
#Ayuda Económica
Data$`Ayuda Económica`= as.ordered(Data$`Ayuda Económica`)
levels(Data$`Ayuda Económica`) = c("Sin apoyo", "Menos del 50% del sueldo")
table(Data$`Ayuda Económica`)
##
## Sin apoyo Menos del 50% del sueldo
## 121 8
#Campañas infomrativas
Data$infoalawk = as.ordered(Data$infoalawk)
levels(Data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(Data$infoalawk)
##
## Ninguna Campañas del gobierno Campañas integrales
## 17 19 93
Eliminamos na’s
Data=na.omit(Data)
ANÁLISIS BIVARIADO ##########################
names(Data)
## [1] "CODE" "Political_sta"
## [3] "Voice_acco" "Ruleoflaw"
## [5] "Control_co" "Regulatory_quality"
## [7] "Tasadesempleo" "Densidadpob"
## [9] "Ayuda Económica" "Country"
## [11] "HDI" "EXPECTATIVAVIDA"
## [13] "EXPECTCOLE" "YEARS_SCHOOLING"
## [15] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [17] "infoalawk" "Rigurosidad"
## [19] "GEE" "PPP_2018"
## [21] "Fecha100" "Valor100"
## [23] "pobla"
Data$Valor100 = (Data$Valor100/Data$pobla)*100
rownames(Data) = Data$Country
Data$Country = NULL
Data$CODE = NULL
Data$Fecha100 = NULL
#abr
Calculemos matriz de correlación:
names(Data)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
Data=Data[c(1:106, 108:126),]
theData = Data
theData = Data[, c(1:5,8,15:17)]
table(theData$`Ayuda Económica`)
##
## Sin apoyo Menos del 50% del sueldo
## 118 7
#theData$Voice_acco = NULL
str(theData)
## 'data.frame': 125 obs. of 9 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality: num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Ayuda Económica : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
#cambiando a nombres más bonitos
lapiz=polycor::hetcor(theData)$correlations
Explorar correlaciones:
ggcorrplot(lapiz)
#evaluandos ignificancia
ggcorrplot(lapiz,
p.mat = cor_pmat(lapiz),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
psych::KMO(lapiz)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = lapiz)
## Overall MSA = 0.82
## MSA for each item =
## Political_sta Voice_acco Ruleoflaw Control_co
## 0.89 0.89 0.84 0.83
## Regulatory_quality Ayuda Económica infoalawk Rigurosidad
## 0.81 0.33 0.55 0.74
## GEE
## 0.86
cortest.bartlett(lapiz,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(lapiz)
## [1] FALSE
theData$`Ayuda Económica` = as.numeric(theData$`Ayuda Económica`)
theData$infoalawk = as.numeric(theData$infoalawk)
fa.parallel(theData, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
mandarina <- fa(theData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## mixed.cor is deprecated, please use mixedCor.
print(mandarina$loadings)
##
## Loadings:
## MR1 MR2
## Political_sta 0.835
## Voice_acco 0.814
## Ruleoflaw 0.962 -0.213
## Control_co 0.945 -0.130
## Regulatory_quality 0.925 -0.176
## Ayuda Económica 0.482
## infoalawk 0.484
## Rigurosidad -0.247 0.976
## GEE 0.931 -0.254
##
## MR1 MR2
## SS loadings 4.969 1.578
## Proportion Var 0.552 0.175
## Cumulative Var 0.552 0.727
fa.diagram(mandarina, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
mandarina$crms
## [1] 0.03800651
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
mandarina$RMSEA
## RMSEA lower upper confidence
## 0.1715892 0.1373051 0.2093756 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
mandarina$TLI
## [1] 0.8893754
¿Qué variables aportaron mas a los factores?
sort(mandarina$communality)
## Ayuda Económica infoalawk Voice_acco Political_sta
## 0.2320900 0.2409486 0.6618985 0.6985701
## Regulatory_quality Control_co GEE Ruleoflaw
## 0.8869448 0.9106537 0.9306910 0.9711806
## Rigurosidad
## 1.0142999
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(mandarina$complexity)
## Voice_acco Ayuda Económica Political_sta Control_co
## 1.000006 1.000738 1.002742 1.037732
## infoalawk Regulatory_quality Ruleoflaw Rigurosidad
## 1.056320 1.071947 1.097528 1.127458
## GEE
## 1.147903
factorial_casos<-as.data.frame(mandarina$scores) #en esta no me sale el factorial
head(factorial_casos)
summary(factorial_casos)
## MR1 MR2
## Min. :-2.0138 Min. :-1.7851
## 1st Qu.:-0.6831 1st Qu.:-0.7623
## Median :-0.2090 Median :-0.2188
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7056 3rd Qu.: 0.6276
## Max. : 1.9846 Max. : 3.2463
#factor estructural
Calculemos matriz de correlación:
demo = Data
names(demo)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
demo = (Data[, c(10:13, 18, 6)])
str(demo)
## 'data.frame': 125 obs. of 6 variables:
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
## $ Tasadesempleo : int 24 7 14 4 2 8 6 6 5 7 ...
demo$empleo = 100 - (demo$Tasadesempleo)
head(demo)
demo$Tasadesempleo = NULL
#cambiando a nombres más bonitos
pinguino=polycor::hetcor(demo)$correlations
Explorar correlaciones:
ggcorrplot(pinguino)
#evaluandos ignificancia
ggcorrplot(pinguino,
p.mat = cor_pmat(pinguino),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
psych::KMO(pinguino)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = pinguino)
## Overall MSA = 0.83
## MSA for each item =
## EXPECTATIVAVIDA EXPECTCOLE YEARS_SCHOOLING
## 0.89 0.84 0.87
## GNI_GROSSNATIONALINCOME PPP_2018 empleo
## 0.77 0.77 0.93
cortest.bartlett(pinguino,n=nrow(demo))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(pinguino)
## [1] FALSE
fa.parallel(demo, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
alfalfa <- fa(demo,nfactors = 1,cor = 'mixed',rotate ="varimax",fm="minres")
## mixed.cor is deprecated, please use mixedCor.
print(alfalfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1
## EXPECTATIVAVIDA 0.860
## EXPECTCOLE 0.856
## YEARS_SCHOOLING 0.863
## GNI_GROSSNATIONALINCOME 0.842
## PPP_2018 0.822
## empleo
##
## MR1
## SS loadings 3.724
## Proportion Var 0.621
fa.diagram(alfalfa, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
alfalfa$crms
## [1] 0.09278042
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
alfalfa$RMSEA
## RMSEA lower upper confidence
## 0.2876571 0.2400340 0.3406230 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
alfalfa$TLI
## [1] 0.719773
¿Qué variables aportaron mas a los factores?
sort(alfalfa$communality)
## empleo PPP_2018 GNI_GROSSNATIONALINCOME
## 0.1212575 0.6749885 0.7091592
## EXPECTCOLE EXPECTATIVAVIDA YEARS_SCHOOLING
## 0.7333696 0.7403700 0.7451315
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(alfalfa$complexity)
## PPP_2018 EXPECTATIVAVIDA EXPECTCOLE
## 1 1 1
## YEARS_SCHOOLING GNI_GROSSNATIONALINCOME empleo
## 1 1 1
factorial_casos<-as.data.frame(alfalfa$scores) #en esta no me sale el factorial
head(factorial_casos)
summary(factorial_casos)
## MR1
## Min. :-1.77100
## 1st Qu.:-0.78127
## Median :-0.01865
## Mean : 0.00000
## 3rd Qu.: 0.64947
## Max. : 1.99009
AJA=cbind(Data[1],as.data.frame(mandarina$scores))
Data$Gobernanza= normalize(AJA$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
Data$Medidas_tempranas=normalize(AJA$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
EJE=cbind(Data[1],as.data.frame(alfalfa$scores))
Data$estructural= normalize(EJE$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
data_regre=Data
names(data_regre)
## [1] "Political_sta" "Voice_acco"
## [3] "Ruleoflaw" "Control_co"
## [5] "Regulatory_quality" "Tasadesempleo"
## [7] "Densidadpob" "Ayuda Económica"
## [9] "HDI" "EXPECTATIVAVIDA"
## [11] "EXPECTCOLE" "YEARS_SCHOOLING"
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"
## [15] "infoalawk" "Rigurosidad"
## [17] "GEE" "PPP_2018"
## [19] "Valor100" "pobla"
## [21] "Gobernanza" "Medidas_tempranas"
## [23] "estructural"
data_regre$pobla = NULL
str(data_regre)
## 'data.frame': 125 obs. of 22 variables:
## $ Political_sta : num -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
## $ Voice_acco : num -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
## $ Ruleoflaw : num -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
## $ Control_co : num -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
## $ Regulatory_quality : num -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
## $ Tasadesempleo : int 24 7 14 4 2 8 6 6 5 7 ...
## $ Densidadpob : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Ayuda Económica : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ HDI : num 0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ Poburbana : num 25.8 66.2 61.2 88 86.8 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Rigurosidad : num 27.78 33.33 81.48 0 2.78 ...
## $ GEE : num -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
## $ PPP_2018 : num 524 3290 5284 41793 43839 ...
## $ Valor100 : num 0.043397 0.000814 0.058581 1.104457 0.166214 ...
## $ Gobernanza : num 0.322 2.142 5.145 8.675 6.801 ...
## $ Medidas_tempranas : num 2.724 3.006 6.989 0.979 1.085 ...
## $ estructural : num 1.32 1.86 5.23 7.13 7.69 ...
## - attr(*, "na.action")= 'omit' Named int [1:3] 39 107 112
## ..- attr(*, "names")= chr [1:3] "39" "116" "121"
names(data_regre)=c("Political stability", "Voice and accountability", "Rule of law", "Control Corruption", "Regulatory Quality", "Tasa de desempleo", "Densidad de la poblacion", "Ayuda economica", "IDH", "Expectativa de vida", "Expectativa de años de escolaridad", "Promedio de años de escolaridad", "Renta Nacional", "Poblacion urbana", "Campañas informativas", "Rigurosidad", "GEE", "PBI per capita", "Contagiados", "Gobernanza", "Medidas tempranas", "Estructural")
MINARISE=formula(Contagiados~data_regre$Gobernanza+data_regre$`Poblacion urbana`+data_regre$`Renta Nacional` +data_regre$`Expectativa de años de escolaridad`)
MINARISEM=lm(MINARISE,data=data_regre)
summary(MINARISEM)
##
## Call:
## lm(formula = MINARISE, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64222 -0.08302 -0.01803 0.05097 0.92788
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.228e-01 9.223e-02 3.500
## data_regre$Gobernanza -2.280e-02 1.135e-02 -2.008
## data_regre$`Poblacion urbana` 1.927e-03 1.112e-03 1.733
## data_regre$`Renta Nacional` 1.391e-05 1.431e-06 9.720
## data_regre$`Expectativa de años de escolaridad` -3.354e-02 9.385e-03 -3.573
## Pr(>|t|)
## (Intercept) 0.000654 ***
## data_regre$Gobernanza 0.046849 *
## data_regre$`Poblacion urbana` 0.085667 .
## data_regre$`Renta Nacional` < 2e-16 ***
## data_regre$`Expectativa de años de escolaridad` 0.000509 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1846 on 120 degrees of freedom
## Multiple R-squared: 0.5803, Adjusted R-squared: 0.5663
## F-statistic: 41.48 on 4 and 120 DF, p-value: < 2.2e-16
efe= formula(Contagiados~ + data_regre$Gobernanza + data_regre$`Medidas tempranas` + data_regre$Estructural + data_regre$`Poblacion urbana` + data_regre$`Densidad de la poblacion`)
afa = lm(efe, data = data_regre)
summary(afa)
##
## Call:
## lm(formula = efe, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34773 -0.12268 -0.03390 0.06314 1.99284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.873e-01 8.584e-02 -2.182 0.0311 *
## data_regre$Gobernanza -1.110e-02 1.651e-02 -0.673 0.5026
## data_regre$`Medidas tempranas` -3.638e-03 1.160e-02 -0.314 0.7544
## data_regre$Estructural 3.688e-02 1.979e-02 1.864 0.0648 .
## data_regre$`Poblacion urbana` 3.248e-03 1.472e-03 2.206 0.0293 *
## data_regre$`Densidad de la poblacion` 1.609e-04 9.276e-05 1.734 0.0854 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2456 on 119 degrees of freedom
## Multiple R-squared: 0.2632, Adjusted R-squared: 0.2322
## F-statistic: 8.502 on 5 and 119 DF, p-value: 6.596e-07
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
Anovita=anova(MINARISEM, afa)
stargazer(Anovita,type = 'text',summary = F,title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## =====================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -------------------------------------
## 1 120 4.088
## 2 119 7.176 1 -3.088
## -------------------------------------
stargazer(afa, MINARISEM, type='text')
##
## ===================================================================================
## Dependent variable:
## ----------------------------------------------
## Contagiados
## (1) (2)
## -----------------------------------------------------------------------------------
## Gobernanza -0.011 -0.023**
## (0.017) (0.011)
##
## `Medidas tempranas` -0.004
## (0.012)
##
## Estructural 0.037*
## (0.020)
##
## `Poblacion urbana` 0.003** 0.002*
## (0.001) (0.001)
##
## `Densidad de la poblacion` 0.0002*
## (0.0001)
##
## `Renta Nacional` 0.00001***
## (0.00000)
##
## `Expectativa de años de escolaridad` -0.034***
## (0.009)
##
## Constant -0.187** 0.323***
## (0.086) (0.092)
##
## -----------------------------------------------------------------------------------
## Observations 125 125
## R2 0.263 0.580
## Adjusted R2 0.232 0.566
## Residual Std. Error 0.246 (df = 119) 0.185 (df = 120)
## F Statistic 8.502*** (df = 5; 119) 41.477*** (df = 4; 120)
## ===================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
el_elegido = MINARISEM
MInarisa
library(ggpubr) #gráfico para ver normalidad
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(scatterplot3d)
library(stargazer)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#LINEALIDAD
plot(el_elegido, 1, main = c("Gráfico 2: Linealidad")) #diagonal, casi lineal
B. Homocedasticidad.
plot(el_elegido, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal
bptest(el_elegido) #valor P mayor a 0.05 Homocedasticidad
##
## studentized Breusch-Pagan test
##
## data: el_elegido
## BP = 57.162, df = 4, p-value = 1.144e-11
c. Normalidad de residuos. Puntos cerca de la diagonal.
plot(el_elegido, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal
shapiro.test(el_elegido$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos
##
## Shapiro-Wilk normality test
##
## data: el_elegido$residuals
## W = 0.83156, p-value = 1.211e-10
VIF(el_elegido)
## data_regre$Gobernanza
## 2.876856
## data_regre$`Poblacion urbana`
## 2.135327
## data_regre$`Renta Nacional`
## 2.759840
## data_regre$`Expectativa de años de escolaridad`
## 2.745941
5.2 ver valores influyentes Prestar atención al indice de Cook.
plot(el_elegido, 5, main = c("Gráfico 5: Identificación de valores influyentes"))
checkMINARISA=as.data.frame(influence.measures(el_elegido)$is.inf)
## Warning in abbreviate(vn): abbreviate used with non-ASCII chars
checkMINARISA[checkMINARISA$cook.d | checkMINARISA$hat,] #120, 124
#data_regre