library(rio)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data_covid= "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

WorldData<-import(file = data_covid)%>%
  mutate(type="datacon")%>% 
  tidyr::gather(Fecha,Valor,-c(type,"Province/State",
                        "Country/Region",Lat,Long))

WorldData= WorldData%>%
  filter(Valor>0)

#Convertimos a formato de fecha
WorldData$Fecha=mdy(WorldData$Fecha)
WorldData$Fecha=as.Date(WorldData$Fecha)

Ponemos un nombre

names(WorldData)[2]="Country"

Juntamos provincias en paises

WorldData=aggregate(Valor
                  ~ Country + Fecha, 
          data = WorldData,    
          sum)

Nombramos bien Egipto

WorldData$Country=gsub('Egypt',"Egypt, Arab Rep.",WorldData$Country)

Un parénteisis necesario para tener el código de cada país Ahora, necesitamos agregar el código a cada país y quedarnos con eso

link1="https://github.com/CarlosGDiez/BasesLimpias/raw/master/Gee_sucio.csv" 
oto=import(link1)
oto = oto[,c(1,2)]
names(oto) = c("Country","CODE")
oto=oto[!duplicated(oto), ]

Calculamos el día 100

Dia100=WorldData%>%
  group_by(Country)%>%
  mutate(dia100=ifelse(Fecha==nth(Fecha,100),1,0))%>%
  filter(dia100==1)

#Mergeamos con Oto para el código
Dia100=merge(oto,Dia100, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia100=Dia100[,c(2:4)]
#Nombramos bien el valor
names(Dia100)[2] = "Fecha100"
names(Dia100)[3] = "Valor100"
#Variable mergeable que servirá más adelante
Dia100$DIA100=paste(Dia100$CODE,Dia100$Fecha)

Calculamos el día 7

Dia7=WorldData%>%
group_by(Country)%>%
mutate(dia7= ifelse(Fecha==nth(Fecha,7),1,0))%>%
filter(dia7==1)
#Mergeamos con Oto para el código
Dia7=merge(oto,Dia7, by.x = 'Country', by.y='Country')
#Nos quedamos solo con el día y el código
Dia7=Dia7[,c(2:4)]
#Nombramos bien el valor
names(Dia7)[2] = "Fecha7"
names(Dia7)[3] = "Valor7"
#Variable mergeable que servirá más adelante
Dia7$DIA7=paste(Dia7$CODE,Dia7$Fecha)

Ahora podemos tocar World data por separado

WorldData=merge(oto,WorldData, by.x = 'Country', by.y='Country')
WorldData$Country = NULL

library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse

## The following object is masked from 'package:base':
## 
##     isFALSE

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(cluster)
library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year

library(dbscan)
library(descr)
library(DescTools)

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:data.table':
## 
##     %like%

## The following object is masked from 'package:car':
## 
##     Recode

## The following object is masked from 'package:BBmisc':
## 
##     %nin%

library(foreign)
library(fpc)

## 
## Attaching package: 'fpc'

## The following object is masked from 'package:dbscan':
## 
##     dbscan

library(ggcorrplot)

## Loading required package: ggplot2

library(GPArotation)
library(haven)
library(htmltab)
library(jsonlite)
library(matrixcalc)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:rio':
## 
##     export

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(PMCMRplus)
library(polycor)
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:polycor':
## 
##     polyserial

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following objects are masked from 'package:DescTools':
## 
##     AUC, ICC, SD

## The following object is masked from 'package:car':
## 
##     logit

library(readr) 
library(readxl)
library(stringi)
library(stringr)
library(tidyr)
library(tidyverse)

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## Found more than one class "atomicVector" in cache; using the first, from namespace 'Matrix'

## Also defined by 'Rmpfr'

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ tibble  3.0.4     ✓ forcats 0.5.0
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%()             masks ggplot2::%+%()
## x psych::alpha()           masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x data.table::between()    masks dplyr::between()
## x BBmisc::coalesce()       masks dplyr::coalesce()
## x BBmisc::collapse()       masks dplyr::collapse()
## x lubridate::date()        masks base::date()
## x plotly::filter()         masks dplyr::filter(), stats::filter()
## x data.table::first()      masks dplyr::first()
## x purrr::flatten()         masks jsonlite::flatten()
## x data.table::hour()       masks lubridate::hour()
## x lubridate::intersect()   masks base::intersect()
## x data.table::isoweek()    masks lubridate::isoweek()
## x dplyr::lag()             masks stats::lag()
## x data.table::last()       masks dplyr::last()
## x data.table::mday()       masks lubridate::mday()
## x data.table::minute()     masks lubridate::minute()
## x data.table::month()      masks lubridate::month()
## x data.table::quarter()    masks lubridate::quarter()
## x car::recode()            masks dplyr::recode()
## x data.table::second()     masks lubridate::second()
## x lubridate::setdiff()     masks base::setdiff()
## x purrr::some()            masks car::some()
## x purrr::transpose()       masks data.table::transpose()
## x lubridate::union()       masks base::union()
## x data.table::wday()       masks lubridate::wday()
## x data.table::week()       masks lubridate::week()
## x data.table::yday()       masks lubridate::yday()
## x data.table::year()       masks lubridate::year()

library(Rmisc)

## Loading required package: lattice

## Loading required package: plyr

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:purrr':
## 
##     compact

## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

#Población Traemos la data de población en cada país

linkedin = "https://github.com/AriannaNKZC/Estad-2/raw/master/%C2%BFSera%20la%20data%3F.xls"
poblacion = import(linkedin)

Nos quedamos con las columnas que nos sirven

poblacion = poblacion[,c(1,2,64)]

Le ponemos nombre

names(poblacion)= c("Country", "CODE", "pobla")

Ahora combinamos las datas de contagios y las de población

WorldData=merge(poblacion,WorldData, by.x = 'CODE', by.y='CODE')

AHORA TRABAJAMOS LAS VARIABLES INDEPENDIENTES

Un parénteisis necesario para tener el código de cada país en español Ahora, necesitamos agregar el código a cada país y quedarnos con eso Traemos la base de datos

CODESPAÑOL<- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
CDSP=import(CODESPAÑOL)

Nos quedamos con las columnas y filas que nos sirven

names(CDSP)=(CDSP[1,])
CDSP = CDSP[-1,]
CDSP = CDSP[,c(1,2)]

Le ponemos nombres

names(CDSP) = c("PAIS", "CODE")

Primera variable: PBI PER CAPITA por precio de dolar actual (2018)

Traemos la data

data_ppp <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_NY.GDP.PCAP.CD_DS2_es_csv_v2_1347337.csv"
ppp_pib =import(data_ppp)

Nos quedamos con las filas y columnas que nos sirven

names(ppp_pib)=(ppp_pib[1,])
ppp_pib = ppp_pib[-1,]
ppp_pib = ppp_pib[,c(2,63)]

Le ponemos nombres

names(ppp_pib) = c("CODE", "PPP_2018")

#Segunda variable: Government Effectiveness Estimate (Índice de la Efectidad de la Gobernanza) Traemos la data (LA MISMA QUE SE USÓ PARA CREAR A OTO)

GEE=import(link1)

Le ponemos nombres

names(GEE) = c("Country","CODE","Series", "SC", "GEE")

Nos quedamos con las filas y columnas que nos sirven

#Filtrar para tomar valor GEE y no el error estandar
GEE=GEE%>%
  group_by(Country)%>%
  mutate(Index = ifelse(Series==nth(Series,1), 1, 0))%>%
  filter(Index==1)

#eliminamos filas vacías
GEE=GEE[-c(215,216,217,218,219),]

## Warning: The `i` argument of ``[.tbl_df`()` must lie in [-rows, 0] if negative, as of tibble 3.0.0.
## Use `NA_integer_` as row index to obtain a row full of `NA` values.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

#Columnas necesarias
GEE = GEE[,c(2,5)]

#Tercera variable: Índice de rigurosidad al séptimo día de contagio Traemos las data

link2="https://github.com/CarlosGDiez/BasesLimpias/blob/master/Rigurosidad.csv?raw=true"
Rigurosidad=import(link2)

Nos quedamos con las filas y columnas que nos sirven

Rigurosidad=Rigurosidad[, c(1,2,5,35)]

Les ponemos nombres

names(Rigurosidad) = c("Country", "CODE","Date","Rigurosidad")

Hay que ordenarlos y juntarlos por fechas

Rigurosidad$Date <- ymd(Rigurosidad$Date)

Creamos variables mergeables

Rigurosidad$DIA7=paste(Rigurosidad$CODE,Rigurosidad$Date)

Nos quedamos solo con la información a la semana de contagios

Rigurosidad=merge(Rigurosidad,Dia7, by.x="DIA7", by.y = "DIA7")

Una vez más, nos quedamos con las filas y columnas que nos sirven

Rigurosidad=Rigurosidad[,c(3,5)]

Nombramos bien las columnas

names(Rigurosidad) = c("CODE","Rigurosidad")

#Cuarta variable: Campañas informativas al séptimo día de contagio Traemos las data

infocamp = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/public-campaigns-covid.csv"
dataic=import(infocamp)

Hay que ordenarlos y juntarlos por fechas

dataic$Date <- ymd(dataic$Date)

Creamos variables mergeables

dataic$DIA7=paste(dataic$Code,dataic$Date)

Nos quedamos solo con la información a la semana de contagios

dataic=merge(dataic,Dia7, by.x="DIA7", by.y = "DIA7")

Una vez más, nos quedamos con las filas y columnas que nos sirven

dataic=dataic[,c(5,6)]

#Quinta varaible: Población Urbana Traemos la data

xurb = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/API_SP.URB.TOTL.IN.ZS_DS2_es_csv_v2_1347951.csv"
dataxurb=import(xurb)

Reacomodamos el nombre de las columnas

names(dataxurb)=(dataxurb[1,])

Nos quedamos con las columnas y filas que nos sirven

dataxurb=dataxurb[,c(2,64)]
dataxurb=dataxurb[-1,]

Nombramos bien las columnas

names(dataxurb) = c("CODE","Poburbana")

Nombramos bien las filas

dataxurb$num=c(1:264)
rownames(dataxurb)=dataxurb[,3]
dataxurb[,3]= NULL

Redondeamos

dataxurb$Poburbana=round(dataxurb$Poburbana, digits = 2)

#Sexta variable:Índice de Desarrollo Humano (Human Development Index), indicador que integra las variables PBI, Educación y Esperanza de vida Traemos la data

LIDH="https://github.com/CarlaMendozaE/Prueba/blob/master/IDH.xlsx?raw=true"
IDH=import(LIDH)

Nos quedamos con las filas y columnas que nos sirven

IDH[,c(1,8,9)]=NULL

Ponemos nombres

names(IDH) = c("Country","HDI","EXPECTATIVAVIDA","EXPECTCOLE","YEARS_SCHOOLING","GNI_GROSSNATIONALINCOME")
IDH$Country=gsub('Egypt',"Egypt, Arab Rep.",IDH$Country)

Convertimos a numéricas

IDH[,c(2:6)]=lapply(IDH[,c(2:6)], as.numeric)

## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, c(2:6)], as.numeric): NAs introduced by coercion

Redondeamos

IDH[2:6]=round(IDH[,2:6], digits = 2)

Agregamos CODE

IDH=merge(oto,IDH, by.x = 'Country', by.y='Country')

#Séptima variable: Ayuda económica Traemos la data

linkayuda="https://raw.githubusercontent.com/CarlosGDiez/BasesLimpias/master/Rigurosidad.csv"
dataayuda=import(linkayuda)

Nos quedamos con las filas y columnas que nos sirven

dataayuda = dataayuda[,c(2,5, 21)]
#USA
dataayuda <- dataayuda[-c(48601 :62640), ]
#UK
dataayuda <- dataayuda[-c(16741 :17820), ]

Les ponemos nombres

names(dataayuda) = c("CODE","Date","Ayuda Económica")

Hay que ordenarlos y juntarlos por fechas

dataayuda$Date <- ymd(dataayuda$Date)

Creamos variables mergeables

dataayuda$DIA7=paste(dataayuda$CODE,dataayuda$Date)

Nos quedamos solo con la información a la semana de contagios

dataayuda=merge(dataayuda,Dia7, by.x="DIA7", by.y = "DIA7")

Una vez más, nos quedamos con las filas y columnas que nos sirven

dataayuda = dataayuda[,c(2,4)]

Nombramos bien CODE

names(dataayuda)[1] = "CODE"

#Octava variable: Densidad de la población Traemos la data

linkdensidad="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/Excel%20densidad.xlsx.xls"
datadensidad=import(linkdensidad)

## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...

Reacomodamos el nombre de las columnas

names(datadensidad)=(datadensidad[3,])

Nos quedamos con las filas y columnas que nos sirven

datadensidad = datadensidad[,c(2, 63)]
datadensidad <- datadensidad[-c(1:3),]

Ponemos nombres

names(datadensidad) = c("CODE","Densidadpob")

Convertimos a numéricas

datadensidad$Densidadpob=as.numeric(datadensidad$Densidadpob)

Redondeamos

datadensidad$Densidadpob=round(datadensidad$Densidadpob, digits = 2)

#Novena variable: Tasa de desempleo Traemos la data

datadesempleo <- "https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/datadesempleooriginal.csv"
datadesempleo=import(datadesempleo)

Le ponemos nombre

names(datadesempleo)= c("PAIS", "Tasadesempleo")
datadesempleo$PAIS=gsub("Egipto","Egipto, República Árabe de",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Benín","Benin",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bahráin","Bahrein",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bosnia y Hercegovina","Bosnia y Herzegovina",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Bután","Bhután",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Botsuana","Botswana",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kazajistán","Kazajstán",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Kenia","Kenya",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Lesoto","Lesotho",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Malaui","Malawi",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Nueva Zelanda","Nueva Zelandia",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Ruanda","Rwanda",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Arabia Saudí","Arabia Saudita",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Surinam","Suriname",datadesempleo$PAIS)
datadesempleo$PAIS=gsub("Zimbabue","Zimbabwe",datadesempleo$PAIS)

Agregamos CODE

datadesempleo=merge(CDSP,datadesempleo, by.x = 'PAIS', by.y='PAIS')

#Décima variable: Regulatory quality Traemos la data

perro = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/258c45e7-1b68-4b8e-853d-a2554f1bb145_Data.csv"
regulatory = import(perro)

Nos quedamos con las filas y columnas que nos sirven

regulatory=regulatory[, c(2,5)]

Ponemos nombres

names(regulatory) = c("CODE","Regulatory_quality")

Convertimos a numéricas

regulatory$Regulatory_quality=as.numeric(regulatory$Regulatory_quality)

## Warning: NAs introduced by coercion

Redondeamos

regulatory$Regulatory_quality=round(regulatory$Regulatory_quality, digits = 2)

#Undécima variable: Control de la corrupción Traemos la data

gato= "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/51253f2e-7374-408f-8685-c729a64d043a_Data.csv"
control_co = import(gato)

Nos quedamos con las filas y columnas que nos sirven

control_co=control_co[, c(2,5)]

Ponemos nombres

names(control_co) = c("CODE","Control_co")

Convertimos a numéricas

control_co$Control_co=as.numeric(control_co$Control_co)

## Warning: NAs introduced by coercion

Redondeamos

control_co$Control_co=round(control_co$Control_co, digits = 2)

#Duodécima variable: Rule of law Traemos la data

AXA = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/a9249c7d-95ab-4618-9160-3a247dea2bae_Data.csv"
ruleof = import(AXA)

Nos quedamos con las filas y columnas que nos sirven

ruleof=ruleof[, c(2,5)]

Ponemos nombres

names(ruleof) = c("CODE","Ruleoflaw")

Convertimos a numéricas

ruleof$Ruleoflaw=as.numeric(ruleof$Ruleoflaw)

## Warning: NAs introduced by coercion

Redondeamos

ruleof$Ruleoflaw=round(ruleof$Ruleoflaw, digits = 2)

#Décimotercera variable: Voice and accountability Traemos la data

VA = 'https://github.com/AriannaNKZC/Estad-2/raw/master/Voice_and_accountability.csv'
VocA = import(VA)

Nos quedamos con las filas y columnas que nos sirven

VocA=VocA[, c(2,5)]

Ponemos nombres

names(VocA) = c("CODE","Voice_acco")

Convertimos a numéricas

VocA$Voice_acco=as.numeric(VocA$Voice_acco)

## Warning: NAs introduced by coercion

Redondeamos

VocA$Voice_acco=round(VocA$Voice_acco, digits = 2)

#Décimocuarta variable: Political stability Traemos la data

PS='https://github.com/AriannaNKZC/Estad-2/raw/master/e0757e7a-8829-44d2-a7a3-11a580c19a53_Data.csv'
PolS = import(PS)

Nos quedamos con las filas y columnas que nos sirven

PolS=PolS[, c(2,5)]

Ponemos nombres

names(PolS) = c("CODE","Political_sta")

Convertimos a numéricas

PolS$Political_sta=as.numeric(PolS$Political_sta)

## Warning: NAs introduced by coercion

Redondeamos

PolS$Political_sta=round(PolS$Political_sta, digits = 2)

MERGEAMOS TODAS LAS VARIABLES EN UN SOLO DATAFRAME

Data=merge(PolS,VocA, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ruleof, by.x = 'CODE', by.y='CODE')
Data=merge(Data,control_co, by.x = 'CODE', by.y='CODE')
Data=merge(Data,regulatory, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadesempleo, by.x = 'CODE', by.y='CODE')
Data=merge(Data,datadensidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataayuda, by.x = 'CODE', by.y='CODE')
Data=merge(Data,IDH, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataxurb, by.x = 'CODE', by.y='CODE')
Data=merge(Data,dataic, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Rigurosidad, by.x = 'CODE', by.y='CODE')
Data=merge(Data,GEE, by.x = 'CODE', by.y='CODE')
Data=merge(Data,ppp_pib, by.x = 'CODE', by.y='CODE')
Data=merge(Data,Dia100, by.x = 'CODE', by.y='CODE')
Data=merge(Data,poblacion, by.x = 'CODE', by.y='CODE')

Limpiamos

#Eliminamos columnas
names(Data)

##  [1] "CODE"                                
##  [2] "Political_sta"                       
##  [3] "Voice_acco"                          
##  [4] "Ruleoflaw"                           
##  [5] "Control_co"                          
##  [6] "Regulatory_quality"                  
##  [7] "PAIS"                                
##  [8] "Tasadesempleo"                       
##  [9] "Densidadpob"                         
## [10] "Ayuda Económica"                     
## [11] "Country.x"                           
## [12] "HDI"                                 
## [13] "EXPECTATIVAVIDA"                     
## [14] "EXPECTCOLE"                          
## [15] "YEARS_SCHOOLING"                     
## [16] "GNI_GROSSNATIONALINCOME"             
## [17] "Poburbana"                           
## [18] "Public information campaigns (OxBSG)"
## [19] "Rigurosidad"                         
## [20] "GEE"                                 
## [21] "PPP_2018"                            
## [22] "Fecha100"                            
## [23] "Valor100"                            
## [24] "DIA100"                              
## [25] "Country.y"                           
## [26] "pobla"

Data=Data[,c(-7,-25,-24)]
#Eliminamos filas repetidas
Data = Data[!duplicated(Data),]

Nombramos bien

names(Data)[10] = "Country"
names(Data)[17] = "infoalawk"

Arreglamos numérica

Data$GEE=as.numeric(Data$GEE)

## Warning: NAs introduced by coercion

Redondeamos

Data$GEE=round(Data$GEE, digits = 2)
Data$PPP_2018=round(Data$PPP_2018, digits = 2)

Arreglamos ordinales

#Ayuda Económica
Data$`Ayuda Económica`= as.ordered(Data$`Ayuda Económica`)
levels(Data$`Ayuda Económica`) = c("Sin apoyo", "Menos del 50% del sueldo")
table(Data$`Ayuda Económica`)

## 
##                Sin apoyo Menos del 50% del sueldo 
##                      121                        8

#Campañas infomrativas
Data$infoalawk = as.ordered(Data$infoalawk)
levels(Data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(Data$infoalawk)

## 
##               Ninguna Campañas del gobierno   Campañas integrales 
##                    17                    19                    93

Eliminamos na’s

Data=na.omit(Data)

ANÁLISIS BIVARIADO ##########################

names(Data)

##  [1] "CODE"                    "Political_sta"          
##  [3] "Voice_acco"              "Ruleoflaw"              
##  [5] "Control_co"              "Regulatory_quality"     
##  [7] "Tasadesempleo"           "Densidadpob"            
##  [9] "Ayuda Económica"         "Country"                
## [11] "HDI"                     "EXPECTATIVAVIDA"        
## [13] "EXPECTCOLE"              "YEARS_SCHOOLING"        
## [15] "GNI_GROSSNATIONALINCOME" "Poburbana"              
## [17] "infoalawk"               "Rigurosidad"            
## [19] "GEE"                     "PPP_2018"               
## [21] "Fecha100"                "Valor100"               
## [23] "pobla"

Data$Valor100 = (Data$Valor100/Data$pobla)*100
rownames(Data) = Data$Country 
Data$Country = NULL
Data$CODE = NULL
Data$Fecha100 = NULL

#abr

Calculemos matriz de correlación:

names(Data)

##  [1] "Political_sta"           "Voice_acco"             
##  [3] "Ruleoflaw"               "Control_co"             
##  [5] "Regulatory_quality"      "Tasadesempleo"          
##  [7] "Densidadpob"             "Ayuda Económica"        
##  [9] "HDI"                     "EXPECTATIVAVIDA"        
## [11] "EXPECTCOLE"              "YEARS_SCHOOLING"        
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"              
## [15] "infoalawk"               "Rigurosidad"            
## [17] "GEE"                     "PPP_2018"               
## [19] "Valor100"                "pobla"

Data=Data[c(1:106, 108:126),]
theData = Data

theData = Data[, c(1:5,8,15:17)]

table(theData$`Ayuda Económica`)

## 
##                Sin apoyo Menos del 50% del sueldo 
##                      118                        7

#theData$Voice_acco = NULL
str(theData)

## 'data.frame':    125 obs. of  9 variables:
##  $ Political_sta     : num  -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
##  $ Voice_acco        : num  -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
##  $ Ruleoflaw         : num  -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
##  $ Control_co        : num  -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
##  $ Regulatory_quality: num  -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
##  $ Ayuda Económica   : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
##  $ infoalawk         : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
##  $ Rigurosidad       : num  27.78 33.33 81.48 0 2.78 ...
##  $ GEE               : num  -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...

#cambiando a nombres más bonitos

lapiz=polycor::hetcor(theData)$correlations

Explorar correlaciones:

ggcorrplot(lapiz)

#evaluandos ignificancia
ggcorrplot(lapiz,
          p.mat = cor_pmat(lapiz),
          insig = "blank",
          title = "Gráfico 1: Matriz de correlación")

verificar si los datos se pueden factorizar

psych::KMO(lapiz)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = lapiz)
## Overall MSA =  0.82
## MSA for each item = 
##      Political_sta         Voice_acco          Ruleoflaw         Control_co 
##               0.89               0.89               0.84               0.83 
## Regulatory_quality    Ayuda Económica          infoalawk        Rigurosidad 
##               0.81               0.33               0.55               0.74 
##                GEE 
##               0.86

Verificar si la matriz de correlaciones es adecuada

cortest.bartlett(lapiz,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)
is.singular.matrix(lapiz)

## [1] FALSE

determinar en cuantos factores o variables latentes podriamos redimensionar la data

theData$`Ayuda Económica` = as.numeric(theData$`Ayuda Económica`)
theData$infoalawk = as.numeric(theData$infoalawk)
fa.parallel(theData, fm = 'ML', fa = 'fa')

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

Redimensionar a numero menor de factores

Resultado inicial:

mandarina <- fa(theData,nfactors = 2,cor = 'mixed',rotate ="varimax",fm="minres")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## mixed.cor is deprecated, please use mixedCor.
print(mandarina$loadings)

## 
## Loadings:
##                    MR1    MR2   
## Political_sta       0.835       
## Voice_acco          0.814       
## Ruleoflaw           0.962 -0.213
## Control_co          0.945 -0.130
## Regulatory_quality  0.925 -0.176
## Ayuda Económica            0.482
## infoalawk                  0.484
## Rigurosidad        -0.247  0.976
## GEE                 0.931 -0.254
## 
##                  MR1   MR2
## SS loadings    4.969 1.578
## Proportion Var 0.552 0.175
## Cumulative Var 0.552 0.727

resultado visual

fa.diagram(mandarina, main = c("Gráfico 2: Árbol de factorización del primer modelo"))

Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?

mandarina$crms

## [1] 0.03800651

¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?

mandarina$RMSEA

##      RMSEA      lower      upper confidence 
##  0.1715892  0.1373051  0.2093756  0.9000000

¿El índice de Tucker-Lewis es mayor a 0.9?

mandarina$TLI

## [1] 0.8893754

¿Qué variables aportaron mas a los factores?

sort(mandarina$communality)

##    Ayuda Económica          infoalawk         Voice_acco      Political_sta 
##          0.2320900          0.2409486          0.6618985          0.6985701 
## Regulatory_quality         Control_co                GEE          Ruleoflaw 
##          0.8869448          0.9106537          0.9306910          0.9711806 
##        Rigurosidad 
##          1.0142999

¿Qué variables contribuyen a mas de un factor? #conviene que salga 1

sort(mandarina$complexity)

##         Voice_acco    Ayuda Económica      Political_sta         Control_co 
##           1.000006           1.000738           1.002742           1.037732 
##          infoalawk Regulatory_quality          Ruleoflaw        Rigurosidad 
##           1.056320           1.071947           1.097528           1.127458 
##                GEE 
##           1.147903

factorial_casos<-as.data.frame(mandarina$scores) #en esta no me sale el factorial
head(factorial_casos)

summary(factorial_casos)

##       MR1               MR2         
##  Min.   :-2.0138   Min.   :-1.7851  
##  1st Qu.:-0.6831   1st Qu.:-0.7623  
##  Median :-0.2090   Median :-0.2188  
##  Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.7056   3rd Qu.: 0.6276  
##  Max.   : 1.9846   Max.   : 3.2463

estructural

#factor estructural

Calculemos matriz de correlación:

demo = Data
names(demo)

##  [1] "Political_sta"           "Voice_acco"             
##  [3] "Ruleoflaw"               "Control_co"             
##  [5] "Regulatory_quality"      "Tasadesempleo"          
##  [7] "Densidadpob"             "Ayuda Económica"        
##  [9] "HDI"                     "EXPECTATIVAVIDA"        
## [11] "EXPECTCOLE"              "YEARS_SCHOOLING"        
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"              
## [15] "infoalawk"               "Rigurosidad"            
## [17] "GEE"                     "PPP_2018"               
## [19] "Valor100"                "pobla"

demo = (Data[, c(10:13, 18, 6)])
str(demo)

## 'data.frame':    125 obs. of  6 variables:
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 78.5 81.8 77.8 ...
##  $ EXPECTCOLE             : num  10.1 11.8 15.2 13.3 13.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.05 10.16 10.95 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 12300 48641 66912 ...
##  $ PPP_2018               : num  524 3290 5284 41793 43839 ...
##  $ Tasadesempleo          : int  24 7 14 4 2 8 6 6 5 7 ...

demo$empleo = 100 - (demo$Tasadesempleo)
head(demo)

demo$Tasadesempleo = NULL

#cambiando a nombres más bonitos

pinguino=polycor::hetcor(demo)$correlations

Explorar correlaciones:

ggcorrplot(pinguino)

#evaluandos ignificancia
ggcorrplot(pinguino,
          p.mat = cor_pmat(pinguino),
          insig = "blank",
          title = "Gráfico 1: Matriz de correlación")

verificar si los datos se pueden factorizar

psych::KMO(pinguino)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = pinguino)
## Overall MSA =  0.83
## MSA for each item = 
##         EXPECTATIVAVIDA              EXPECTCOLE         YEARS_SCHOOLING 
##                    0.89                    0.84                    0.87 
## GNI_GROSSNATIONALINCOME                PPP_2018                  empleo 
##                    0.77                    0.77                    0.93

Verificar si la matriz de correlaciones es adecuada

cortest.bartlett(pinguino,n=nrow(demo))$p.value>0.05

## [1] FALSE

library(matrixcalc)
is.singular.matrix(pinguino)

## [1] FALSE

determinar en cuantos factores o variables latentes podriamos redimensionar la data

fa.parallel(demo, fm = 'ML', fa = 'fa')

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

Redimensionar a numero menor de factores #HACIENDO FACTORES FORZOSOS

Resultado inicial:

alfalfa <- fa(demo,nfactors = 1,cor = 'mixed',rotate ="varimax",fm="minres")

## mixed.cor is deprecated, please use mixedCor.
print(alfalfa$loadings,cutoff = 0.5)

## 
## Loadings:
##                         MR1  
## EXPECTATIVAVIDA         0.860
## EXPECTCOLE              0.856
## YEARS_SCHOOLING         0.863
## GNI_GROSSNATIONALINCOME 0.842
## PPP_2018                0.822
## empleo                       
## 
##                  MR1
## SS loadings    3.724
## Proportion Var 0.621

resultado visual

fa.diagram(alfalfa, main = c("Gráfico 2: Árbol de factorización del primer modelo"))

Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?

alfalfa$crms

## [1] 0.09278042

¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?

alfalfa$RMSEA

##      RMSEA      lower      upper confidence 
##  0.2876571  0.2400340  0.3406230  0.9000000

¿El índice de Tucker-Lewis es mayor a 0.9?

alfalfa$TLI

## [1] 0.719773

¿Qué variables aportaron mas a los factores?

sort(alfalfa$communality)

##                  empleo                PPP_2018 GNI_GROSSNATIONALINCOME 
##               0.1212575               0.6749885               0.7091592 
##              EXPECTCOLE         EXPECTATIVAVIDA         YEARS_SCHOOLING 
##               0.7333696               0.7403700               0.7451315

¿Qué variables contribuyen a mas de un factor? #conviene que salga 1

sort(alfalfa$complexity)

##                PPP_2018         EXPECTATIVAVIDA              EXPECTCOLE 
##                       1                       1                       1 
##         YEARS_SCHOOLING GNI_GROSSNATIONALINCOME                  empleo 
##                       1                       1                       1

factorial_casos<-as.data.frame(alfalfa$scores) #en esta no me sale el factorial
head(factorial_casos)

summary(factorial_casos)

##       MR1          
##  Min.   :-1.77100  
##  1st Qu.:-0.78127  
##  Median :-0.01865  
##  Mean   : 0.00000  
##  3rd Qu.: 0.64947  
##  Max.   : 1.99009

agregandolo a la data

AJA=cbind(Data[1],as.data.frame(mandarina$scores))

Data$Gobernanza= normalize(AJA$MR1, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))
Data$Medidas_tempranas=normalize(AJA$MR2, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

EJE=cbind(Data[1],as.data.frame(alfalfa$scores))

Data$estructural= normalize(EJE$MR1, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

agrega

data_regre=Data
names(data_regre)

##  [1] "Political_sta"           "Voice_acco"             
##  [3] "Ruleoflaw"               "Control_co"             
##  [5] "Regulatory_quality"      "Tasadesempleo"          
##  [7] "Densidadpob"             "Ayuda Económica"        
##  [9] "HDI"                     "EXPECTATIVAVIDA"        
## [11] "EXPECTCOLE"              "YEARS_SCHOOLING"        
## [13] "GNI_GROSSNATIONALINCOME" "Poburbana"              
## [15] "infoalawk"               "Rigurosidad"            
## [17] "GEE"                     "PPP_2018"               
## [19] "Valor100"                "pobla"                  
## [21] "Gobernanza"              "Medidas_tempranas"      
## [23] "estructural"

data_regre$pobla = NULL
str(data_regre)

## 'data.frame':    125 obs. of  22 variables:
##  $ Political_sta          : num  -2.65 -0.31 0.12 1.62 0.7 -0.12 1.09 0.98 -0.68 0.48 ...
##  $ Voice_acco             : num  -0.99 -0.78 0.15 1.14 -1.12 0.6 1.32 1.33 -1.49 1.37 ...
##  $ Ruleoflaw              : num  -1.71 -1.05 -0.41 1.58 0.84 -0.43 1.73 1.88 -0.58 1.36 ...
##  $ Control_co             : num  -1.4 -1.05 -0.53 1.23 1.11 -0.07 1.81 1.55 -0.87 1.55 ...
##  $ Regulatory_quality     : num  -1.12 -0.89 0.27 1.23 0.98 -0.49 1.87 1.46 -0.23 1.29 ...
##  $ Tasadesempleo          : int  24 7 14 4 2 8 6 6 5 7 ...
##  $ Densidadpob            : num  56.9 24.7 104.6 163.8 135.6 ...
##  $ Ayuda Económica        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
##  $ HDI                    : num  0.5 0.57 0.79 0.86 0.87 0.83 0.94 0.91 0.75 0.92 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 78.5 81.8 77.8 ...
##  $ EXPECTCOLE             : num  10.1 11.8 15.2 13.3 13.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.05 10.16 10.95 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 12300 48641 66912 ...
##  $ Poburbana              : num  25.8 66.2 61.2 88 86.8 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 3 1 1 3 3 3 3 3 ...
##  $ Rigurosidad            : num  27.78 33.33 81.48 0 2.78 ...
##  $ GEE                    : num  -1.46 -1.05 0.11 1.94 1.43 0.03 1.6 1.45 -0.1 1.17 ...
##  $ PPP_2018               : num  524 3290 5284 41793 43839 ...
##  $ Valor100               : num  0.043397 0.000814 0.058581 1.104457 0.166214 ...
##  $ Gobernanza             : num  0.322 2.142 5.145 8.675 6.801 ...
##  $ Medidas_tempranas      : num  2.724 3.006 6.989 0.979 1.085 ...
##  $ estructural            : num  1.32 1.86 5.23 7.13 7.69 ...
##  - attr(*, "na.action")= 'omit' Named int [1:3] 39 107 112
##   ..- attr(*, "names")= chr [1:3] "39" "116" "121"

names(data_regre)=c("Political stability", "Voice and accountability", "Rule of law", "Control Corruption", "Regulatory Quality", "Tasa de desempleo", "Densidad de la poblacion", "Ayuda economica", "IDH", "Expectativa de vida", "Expectativa de años de escolaridad", "Promedio de años de escolaridad", "Renta Nacional", "Poblacion urbana", "Campañas informativas", "Rigurosidad", "GEE", "PBI per capita", "Contagiados", "Gobernanza", "Medidas tempranas", "Estructural")

MINARISE=formula(Contagiados~data_regre$Gobernanza+data_regre$`Poblacion urbana`+data_regre$`Renta Nacional` +data_regre$`Expectativa de años de escolaridad`)
MINARISEM=lm(MINARISE,data=data_regre)
summary(MINARISEM)

## 
## Call:
## lm(formula = MINARISE, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64222 -0.08302 -0.01803  0.05097  0.92788 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                      3.228e-01  9.223e-02   3.500
## data_regre$Gobernanza                           -2.280e-02  1.135e-02  -2.008
## data_regre$`Poblacion urbana`                    1.927e-03  1.112e-03   1.733
## data_regre$`Renta Nacional`                      1.391e-05  1.431e-06   9.720
## data_regre$`Expectativa de años de escolaridad` -3.354e-02  9.385e-03  -3.573
##                                                 Pr(>|t|)    
## (Intercept)                                     0.000654 ***
## data_regre$Gobernanza                           0.046849 *  
## data_regre$`Poblacion urbana`                   0.085667 .  
## data_regre$`Renta Nacional`                      < 2e-16 ***
## data_regre$`Expectativa de años de escolaridad` 0.000509 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1846 on 120 degrees of freedom
## Multiple R-squared:  0.5803, Adjusted R-squared:  0.5663 
## F-statistic: 41.48 on 4 and 120 DF,  p-value: < 2.2e-16

efe= formula(Contagiados~ + data_regre$Gobernanza + data_regre$`Medidas tempranas` + data_regre$Estructural + data_regre$`Poblacion urbana` + data_regre$`Densidad de la poblacion`)
afa = lm(efe, data = data_regre)
summary(afa)

## 
## Call:
## lm(formula = efe, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34773 -0.12268 -0.03390  0.06314  1.99284 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                           -1.873e-01  8.584e-02  -2.182   0.0311 *
## data_regre$Gobernanza                 -1.110e-02  1.651e-02  -0.673   0.5026  
## data_regre$`Medidas tempranas`        -3.638e-03  1.160e-02  -0.314   0.7544  
## data_regre$Estructural                 3.688e-02  1.979e-02   1.864   0.0648 .
## data_regre$`Poblacion urbana`          3.248e-03  1.472e-03   2.206   0.0293 *
## data_regre$`Densidad de la poblacion`  1.609e-04  9.276e-05   1.734   0.0854 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2456 on 119 degrees of freedom
## Multiple R-squared:  0.2632, Adjusted R-squared:  0.2322 
## F-statistic: 8.502 on 5 and 119 DF,  p-value: 6.596e-07

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

Anovita=anova(MINARISEM, afa)
stargazer(Anovita,type = 'text',summary = F,title = "Table de Análisis de Varianza")

## 
## Table de Análisis de Varianza
## =====================================
##   Res.Df  RSS  Df Sum of Sq F Pr(> F)
## -------------------------------------
## 1  120   4.088                       
## 2  119   7.176 1   -3.088            
## -------------------------------------

stargazer(afa, MINARISEM,  type='text')

## 
## ===================================================================================
##                                                   Dependent variable:              
##                                      ----------------------------------------------
##                                                       Contagiados                  
##                                               (1)                     (2)          
## -----------------------------------------------------------------------------------
## Gobernanza                                   -0.011                -0.023**        
##                                             (0.017)                 (0.011)        
##                                                                                    
## `Medidas tempranas`                          -0.004                                
##                                             (0.012)                                
##                                                                                    
## Estructural                                  0.037*                                
##                                             (0.020)                                
##                                                                                    
## `Poblacion urbana`                          0.003**                 0.002*         
##                                             (0.001)                 (0.001)        
##                                                                                    
## `Densidad de la poblacion`                  0.0002*                                
##                                             (0.0001)                               
##                                                                                    
## `Renta Nacional`                                                  0.00001***       
##                                                                    (0.00000)       
##                                                                                    
## `Expectativa de años de escolaridad`                               -0.034***       
##                                                                     (0.009)        
##                                                                                    
## Constant                                    -0.187**               0.323***        
##                                             (0.086)                 (0.092)        
##                                                                                    
## -----------------------------------------------------------------------------------
## Observations                                  125                     125          
## R2                                           0.263                   0.580         
## Adjusted R2                                  0.232                   0.566         
## Residual Std. Error                     0.246 (df = 119)       0.185 (df = 120)    
## F Statistic                          8.502*** (df = 5; 119) 41.477*** (df = 4; 120)
## ===================================================================================
## Note:                                                   *p<0.1; **p<0.05; ***p<0.01

el_elegido = MINARISEM

MInarisa

library(ggpubr) #gráfico para ver normalidad

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:plyr':
## 
##     mutate

library(scatterplot3d)
library(stargazer)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#LINEALIDAD
plot(el_elegido, 1, main = c("Gráfico 2: Linealidad"))  #diagonal, casi lineal

B. Homocedasticidad.

plot(el_elegido, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal

bptest(el_elegido) #valor P mayor a 0.05 Homocedasticidad

## 
##  studentized Breusch-Pagan test
## 
## data:  el_elegido
## BP = 57.162, df = 4, p-value = 1.144e-11

c. Normalidad de residuos. Puntos cerca de la diagonal.

plot(el_elegido, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal

shapiro.test(el_elegido$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos

## 
##  Shapiro-Wilk normality test
## 
## data:  el_elegido$residuals
## W = 0.83156, p-value = 1.211e-10

VIF(el_elegido)

##                           data_regre$Gobernanza 
##                                        2.876856 
##                   data_regre$`Poblacion urbana` 
##                                        2.135327 
##                     data_regre$`Renta Nacional` 
##                                        2.759840 
## data_regre$`Expectativa de años de escolaridad` 
##                                        2.745941

5.2 ver valores influyentes Prestar atención al indice de Cook.

plot(el_elegido, 5, main = c("Gráfico 5: Identificación de valores influyentes"))

checkMINARISA=as.data.frame(influence.measures(el_elegido)$is.inf)

## Warning in abbreviate(vn): abbreviate used with non-ASCII chars

checkMINARISA[checkMINARISA$cook.d | checkMINARISA$hat,] #120, 124

#data_regre

R Notebook

Primera variable: PBI PER CAPITA por precio de dolar actual (2018)

estructural

agregandolo a la data

agrega