En otra notebook, corre la data*

library(readxl)
library(rio)
library(stringi)
library(htmltab)
library(jsonlite)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(readr)   
library(stringr)
library(tidyr)
library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year

library(DescTools)

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:data.table':
## 
##     %like%

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

intentemos de otra manera

library(readxl)

data_covid= "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

esperanzita=import(data_covid)

estoy segura que me arrepentire de esto, amen

esperanzita = esperanzita[,c(1,2,5:150)]

names(esperanzita) = c("PROVINCE","Country","A_1/22/20", "A_1/23/20",   "A_1/24/20",    "A_1/25/20",    "A_1/26/20",    "A_1/27/20", "A_1/28/20", "A_1/29/20",  "A_1/30/20",    "A_1/31/20",    "A_2/1/20", "A_2/2/20", "A_2/3/20", "A_2/4/20",     "A_2/5/20", "A_2/6/20"  ,   "A2/7/20"   ,   "A2/8/20",  "A_2/9/20", "A_2/10/20",    "A_2/11/20",    "A_2/12/20",    "A_2/13/20",    "A_2/14/20",    "A_2/15/20",    "A_2/16/20",    "A_2/17/20",    "A_2/18/20",    "A_2/19/20",    "A_2/20/20",    "A_2/21/20",    "A_2/22/20",    "A_2/23/20",    "A_2/24/20",    "A_2/25/20",    "A_2/26/20",    "A_2/27/20",    "A_2/28/20",    "A_2/29/20",    "A_3/1/20", "A_3/2/20", "A_3/3/20", "A_3/4/20", "A_3/5/20", "A_3/6/20", "A_3/7/20", "A_3/8/20", "A_3/9/20", "A_3/10/20",    "A_3/11/20",    "A_3/12/20",    "A_3/13/20",    "A_3/14/20",    "A_3/15/20",    "A_3/16/20",    "A_3/17/20",    "A_3/18/20",    "A_3/19/20","A_3/20/20", "A_3/21/20", "A_3/22/20", "A_3/23/20", "A_3/24/20", "A_3/25/20", "A_3/26/20", "A_3/27/20", "A_3/28/20", "A_3/29/20", "A_3/30/20", "A_3/31/20", "A_4/1/20", "A_4/2/20", "A_4/3/20", "A_4/4/20", "A_4/5/20", "A_4/6/20", "A_4/7/20", "A_4/8/20", "A_4/9/20", "A_4/10/20", "A_4/11/20", "A_4/12/20", "A_4/13/20", "A_4/14/20", "A_4/15/20", "A_4/16/20", "A_4/17/20", "A_4/18/20", "A_4/19/20", "A_4/20/20", "A_4/21/20", "A_4/22/20", "A_4/23/20", "A_4/24/20", "A_4/25/20", "A_4/26/20", "A_4/27/20", "A_4/28/20", "A_4/29/20", "A_4/30/20", "A_5/1/20", "A_5/2/20", "A_5/3/20", "A_5/4/20", "A_5/5/20", "A_5/6/20", "A_5/7/20", "A_5/8/20", "A_5/9/20", "A_5/10/20", "A_5/11/20", "A_5/12/20", "A_5/13/20", "A_5/14/20", "A_5/15/20", "A_5/16/20", "A_5/17/20", "A_5/18/20", "A_5/19/20", "A_5/20/20", "A_5/21/20", "A_5/22/20", "A_5/23/20", "A_5/24/20", "A_5/25/20", "A_5/26/20", "A_5/27/20", "A_5/28/20", "A_5/29/20", "A_5/30/20", "A_5/31/20", "A_6/1/20", "A_6/2/20", "A_6/3/20", "A_6/4/20", "A_6/5/20", "A_6/6/20", "A_6/7/20    ", "A_6/8/20", "A_6/9/20", "A_6/10/20", "A_6/11/20", "A_6/12/20", "A_6/13/20", "A_6/14/20", "A_6/15/20")

2020-06-11

str(esperanzita)

## 'data.frame':    266 obs. of  148 variables:
##  $ PROVINCE  : chr  "" "" "" "" ...
##  $ Country   : chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ A_1/22/20 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ A_1/23/20 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ A_1/24/20 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ A_1/25/20 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ A_1/26/20 : int  0 0 0 0 0 0 0 0 0 3 ...
##  $ A_1/27/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_1/28/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_1/29/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_1/30/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_1/31/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/1/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/2/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/3/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/4/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/5/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/6/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A2/7/20   : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A2/8/20   : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/9/20  : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/10/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/11/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/12/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/13/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/14/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/15/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/16/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/17/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/18/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/19/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/20/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/21/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/22/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/23/20 : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/24/20 : int  1 0 0 0 0 0 0 0 0 4 ...
##  $ A_2/25/20 : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ A_2/26/20 : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ A_2/27/20 : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ A_2/28/20 : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ A_2/29/20 : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ A_3/1/20  : int  1 0 1 0 0 0 0 1 0 6 ...
##  $ A_3/2/20  : int  1 0 3 1 0 0 0 1 0 6 ...
##  $ A_3/3/20  : int  1 0 5 1 0 0 1 1 0 13 ...
##  $ A_3/4/20  : int  1 0 12 1 0 0 1 1 0 22 ...
##  $ A_3/5/20  : int  1 0 12 1 0 0 1 1 0 22 ...
##  $ A_3/6/20  : int  1 0 17 1 0 0 2 1 0 26 ...
##  $ A_3/7/20  : int  1 0 17 1 0 0 8 1 0 28 ...
##  $ A_3/8/20  : int  4 0 19 1 0 0 12 1 0 38 ...
##  $ A_3/9/20  : int  4 2 20 1 0 0 12 1 0 48 ...
##  $ A_3/10/20 : int  5 10 20 1 0 0 17 1 0 55 ...
##  $ A_3/11/20 : int  7 12 20 1 0 0 19 1 0 65 ...
##  $ A_3/12/20 : int  7 23 24 1 0 0 19 4 0 65 ...
##  $ A_3/13/20 : int  7 33 26 1 0 1 31 8 1 92 ...
##  $ A_3/14/20 : int  11 38 37 1 0 1 34 18 1 112 ...
##  $ A_3/15/20 : int  16 42 48 1 0 1 45 26 1 134 ...
##  $ A_3/16/20 : int  21 51 54 2 0 1 56 52 2 171 ...
##  $ A_3/17/20 : int  22 55 60 39 0 1 68 78 2 210 ...
##  $ A_3/18/20 : int  22 59 74 39 0 1 79 84 3 267 ...
##  $ A_3/19/20 : int  22 64 87 53 0 1 97 115 4 307 ...
##  $ A_3/20/20 : int  24 70 90 75 1 1 128 136 6 353 ...
##  $ A_3/21/20 : int  24 76 139 88 2 1 158 160 9 436 ...
##  $ A_3/22/20 : int  40 89 201 113 2 1 266 194 19 669 ...
##  $ A_3/23/20 : int  40 104 230 133 3 3 301 235 32 669 ...
##  $ A_3/24/20 : int  74 123 264 164 3 3 387 249 39 818 ...
##  $ A_3/25/20 : int  84 146 302 188 3 3 387 265 39 1029 ...
##  $ A_3/26/20 : int  94 174 367 224 4 7 502 290 53 1219 ...
##  $ A_3/27/20 : int  110 186 409 267 4 7 589 329 62 1405 ...
##  $ A_3/28/20 : int  110 197 454 308 5 7 690 407 71 1617 ...
##  $ A_3/29/20 : int  120 212 511 334 7 7 745 424 77 1791 ...
##  $ A_3/30/20 : int  170 223 584 370 7 7 820 482 78 2032 ...
##  $ A_3/31/20 : int  174 243 716 376 7 7 1054 532 80 2032 ...
##  $ A_4/1/20  : int  237 259 847 390 8 7 1054 571 84 2182 ...
##  $ A_4/2/20  : int  273 277 986 428 8 9 1133 663 87 2298 ...
##  $ A_4/3/20  : int  281 304 1171 439 8 15 1265 736 91 2389 ...
##  $ A_4/4/20  : int  299 333 1251 466 10 15 1451 770 93 2493 ...
##  $ A_4/5/20  : int  349 361 1320 501 14 15 1451 822 96 2580 ...
##  $ A_4/6/20  : int  367 377 1423 525 16 15 1554 833 96 2637 ...
##  $ A_4/7/20  : int  423 383 1468 545 17 19 1628 853 96 2686 ...
##  $ A_4/8/20  : int  444 400 1572 564 19 19 1715 881 99 2734 ...
##  $ A_4/9/20  : int  484 409 1666 583 19 19 1795 921 100 2773 ...
##  $ A_4/10/20 : int  521 416 1761 601 19 19 1975 937 103 2822 ...
##  $ A_4/11/20 : int  555 433 1825 601 19 21 1975 967 103 2857 ...
##  $ A_4/12/20 : int  607 446 1914 638 19 21 2142 1013 103 2857 ...
##  $ A_4/13/20 : int  665 467 1983 646 19 23 2208 1039 102 2863 ...
##  $ A_4/14/20 : int  714 475 2070 659 19 23 2277 1067 103 2870 ...
##  $ A_4/15/20 : int  784 494 2160 673 19 23 2443 1111 103 2886 ...
##  $ A_4/16/20 : int  840 518 2268 673 19 23 2571 1159 103 2897 ...
##  $ A_4/17/20 : int  906 539 2418 696 19 23 2669 1201 103 2926 ...
##  $ A_4/18/20 : int  933 548 2534 704 24 23 2758 1248 103 2936 ...
##  $ A_4/19/20 : int  996 562 2629 713 24 23 2839 1291 103 2957 ...
##  $ A_4/20/20 : int  1026 584 2718 717 24 23 2941 1339 104 2963 ...
##  $ A_4/21/20 : int  1092 609 2811 717 24 23 3031 1401 104 2969 ...
##  $ A_4/22/20 : int  1176 634 2910 723 25 24 3144 1473 104 2971 ...
##  $ A_4/23/20 : int  1279 663 3007 723 25 24 3435 1523 104 2976 ...
##  $ A_4/24/20 : int  1351 678 3127 731 25 24 3607 1596 105 2982 ...
##  $ A_4/25/20 : int  1463 712 3256 738 25 24 3780 1677 106 2994 ...
##  $ A_4/26/20 : int  1531 726 3382 738 26 24 3892 1746 106 3002 ...
##  $ A_4/27/20 : int  1703 736 3517 743 27 24 4003 1808 106 3004 ...
##   [list output truncated]

esperanzita[] <- lapply(esperanzita, as.character)
esperanzita <- data.frame(lapply(esperanzita, function(x) ifelse(!is.na(as.numeric(x)), as.numeric(x),  x)))

## Warning in ifelse(!is.na(as.numeric(x)), as.numeric(x), x): NAs introduced by
## coercion

## Warning in ifelse(!is.na(as.numeric(x)), as.numeric(x), x): NAs introduced by
## coercion

esperanzita[esperanzita == 0] <- NA

library(dplyr)
library(tidyr)


esperanzita = esperanzita %>% 
  gather(A_, val, 3:148) %>% 
  filter(!is.na(val)) %>% 
  group_by(Country) %>% 
  summarise(uno= first(val),
            dos = nth(val, 2),
             tres = nth(val, 3),
             cuatro = nth(val, 4),
             cinco = nth(val, 5),
             seis = nth(val, 6),
             siete = nth(val, 7),
             ocho = nth(val, 8),
             nueve = nth(val, 9),
             diez = nth(val, 10),
             once = nth(val, 11),
             doce = nth(val, 12),
             trece = nth(val, 13),
             catorce = nth(val, 14),
             quince = nth(val, 15),
             dieciseis = nth(val, 16),
             diecisite = nth(val, 17),
             dieciocho = nth(val, 18),
             diecinueve = nth(val, 19),
             veinte = nth(val, 20),
             veintiuno = nth(val, 21),
             veintidos = nth(val, 22),
             veintitres = nth(val, 23),
             veinticuatro = nth(val, 24),
             veinticinco = nth(val, 25),
             veintiseis = nth(val, 26),
             veintisiete = nth(val, 27),
             veintocho = nth(val, 28),
             veintinueve = nth(val, 29),
             treinta = nth(val, 30))%>% 
  left_join(esperanzita, ., by = "Country")

## `summarise()` ungrouping output (override with `.groups` argument)

esperanzita = esperanzita[,-c(3:148)]

esperanzita=aggregate(cbind(uno,dos,tres,cuatro,cinco,seis,siete,ocho,nueve,diez,once,doce,trece,catorce,quince,dieciseis,diecisite,dieciocho,diecinueve, veinte, veintiuno,veintidos,veintitres,veinticuatro,veinticinco,veintiseis,veintisiete,veintocho,veintinueve,treinta) ~ Country,data = esperanzita,sum)

#para añadir el nombre

hoja="https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/codigo.csv" 
oto=import(hoja)
esperanzita=merge(oto,esperanzita, by.x = 'Country', by.y='Country') #¿se podra recuperar más data de países?
names(esperanzita)

##  [1] "Country"      "CODE"         "uno"          "dos"          "tres"        
##  [6] "cuatro"       "cinco"        "seis"         "siete"        "ocho"        
## [11] "nueve"        "diez"         "once"         "doce"         "trece"       
## [16] "catorce"      "quince"       "dieciseis"    "diecisite"    "dieciocho"   
## [21] "diecinueve"   "veinte"       "veintiuno"    "veintidos"    "veintitres"  
## [26] "veinticuatro" "veinticinco"  "veintiseis"   "veintisiete"  "veintocho"   
## [31] "veintinueve"  "treinta"

esperanzita$Country = NULL

linkedin = "https://github.com/AriannaNKZC/Estad-2/raw/master/%C2%BFSera%20la%20data%3F.xls"
poblacion = import(linkedin)
poblacion = poblacion[,c(1,2,64)]
str(poblacion)

## 'data.frame':    264 obs. of  3 variables:
##  $ Country Name: chr  "Aruba" "Afganistán" "Angola" "Albania" ...
##  $ Country Code: chr  "ABW" "AFG" "AGO" "ALB" ...
##  $ 2019        : num  106314 38041754 31825295 2854191 77142 ...

names(poblacion)= c("Country", "CODE", "pobla")
esperanzita=merge(poblacion,esperanzita, by.x = 'CODE', by.y='CODE')

#pobla esta en miles

esperanzita$pobla = (esperanzita$pobla)

#comenzo el martirio

esperanzita$uno = (esperanzita$uno/esperanzita$pobla)*100
esperanzita$dos = (esperanzita$dos/esperanzita$pobla)*100
esperanzita$tres = (esperanzita$tres/esperanzita$pobla)*100
esperanzita$cuatro = (esperanzita$cuatro/esperanzita$pobla)*100
esperanzita$cinco = (esperanzita$cinco/esperanzita$pobla)*100
esperanzita$seis = (esperanzita$seis/esperanzita$pobla)*100
esperanzita$siete = (esperanzita$siete/esperanzita$pobla)*100
esperanzita$ocho = (esperanzita$ocho/esperanzita$pobla)*100
esperanzita$nueve = (esperanzita$nueve/esperanzita$pobla)*100
esperanzita$diez = (esperanzita$diez/esperanzita$pobla)*100

esperanzita$once = (esperanzita$once/esperanzita$pobla)*100
esperanzita$doce = (esperanzita$doce/esperanzita$pobla)*100
esperanzita$trece = (esperanzita$trece/esperanzita$pobla)*100
esperanzita$catorce= (esperanzita$catorce/esperanzita$pobla)*100
esperanzita$quince = (esperanzita$quince/esperanzita$pobla)*100
esperanzita$dieciseis = (esperanzita$dieciseis/esperanzita$pobla)*100
esperanzita$diecisite = (esperanzita$diecisite/esperanzita$pobla)*100
esperanzita$dieciocho = (esperanzita$dieciocho/esperanzita$pobla)*100
esperanzita$diecinueve = (esperanzita$diecinueve/esperanzita$pobla)*100
esperanzita$veinte = (esperanzita$veinte/esperanzita$pobla)*100

esperanzita$veintiuno = (esperanzita$veintiuno/esperanzita$pobla)*100
esperanzita$veintidos = (esperanzita$veintidos/esperanzita$pobla)*100
esperanzita$veintitres = (esperanzita$veintitres/esperanzita$pobla)*100
esperanzita$veinticuatro = (esperanzita$veinticuatro/esperanzita$pobla)*100
esperanzita$veinticinco = (esperanzita$veinticinco/esperanzita$pobla)*100
esperanzita$veintiseis = (esperanzita$veintiseis/esperanzita$pobla)*100
esperanzita$veintisiete = (esperanzita$veintisiete/esperanzita$pobla)*100
esperanzita$veintocho = (esperanzita$veintocho/esperanzita$pobla)*100
esperanzita$veintinueve = (esperanzita$veintinueve/esperanzita$pobla)*100
esperanzita$treinta = (esperanzita$treinta/esperanzita$pobla)*100

#la suma y division

esperanzita$promedio = rowSums(esperanzita[ , 4:33])
esperanzita$promedio = (esperanzita$promedio/30)

esperanzita = esperanzita[,c(1,2,3,34)]

libraries

library(rio)
library(stringi)
library(htmltab)
library(jsonlite)
library(lubridate)
library(readr)   
library(stringr)
library(tidyr)
library(data.table)
library(DescTools)
library(readxl)

Trabajando para limpiar las dos datas

Primera variable independiente: el porcentaje de gasto de salud por PBI (2017)

Trayendo la base de datos

data_salud <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
gasto_salud=import(data_salud)

Eliminando las filas y columnas no significativas

names(gasto_salud)=(gasto_salud[1,])
gasto_salud = gasto_salud[-1,]
gasto_salud = gasto_salud[,c(1,2, 62)]

inspeccionando las variables

str(gasto_salud$`2017`)

##  num [1:264] NA 11.78 2.79 NA 10.32 ...

names(gasto_salud) = c("PAIS", "CODE","GS_2017")
summary(gasto_salud)

##      PAIS               CODE              GS_2017      
##  Length:264         Length:264         Min.   : 1.181  
##  Class :character   Class :character   1st Qu.: 4.534  
##  Mode  :character   Mode  :character   Median : 6.342  
##                                        Mean   : 6.633  
##                                        3rd Qu.: 8.196  
##                                        Max.   :17.143  
##                                        NA's   :79

#quitando las tildes
gasto_salud$PAIS =stri_trans_general(gasto_salud$PAIS,"Latin-ASCII")

Segunda variable: PBI PER CAPITA por precio de dolar actual (2018)

Extrayendo la base de datos

library(readxl)
data_ppp <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_NY.GDP.PCAP.CD_DS2_es_csv_v2_1347337.csv"
ppp_pib =import(data_ppp)

Eliminación de columnas y el cambio de nombre

names(ppp_pib)=(ppp_pib[1,])
ppp_pib = ppp_pib[-1,]
ppp_pib = ppp_pib[,c(2,63)]
names(ppp_pib) = c("CODE", "PPP_2018")

Base con listado de países (sin agrupación de continentes como el del Banco Mundial)

linkfechas="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/BASE_FECHA_INICIO.xlsx"
datafechas=import(linkfechas)
datafechas = datafechas[,c(1,2)]
names(datafechas) = c("COUNTRY","CODE")

Merge

Tabla_Final = merge(gasto_salud,ppp_pib,by.x='CODE', by.y='CODE')

link1="https://github.com/CarlosGDiez/BasesLimpias/raw/master/Gee_sucio.csv"
data1=import(link1) #esto puede ser demasiado pesado para correrlo como Chunk... como linea individual no tiene problema.
dim(data1)

## [1] 433   5

link2="https://github.com/CarlosGDiez/BasesLimpias/blob/master/Rigurosidad.csv?raw=true" #esto puede ser demasiado pesado para correrlo como Chunk... como linea individual no tiene problema.
data2=import(link2)
dim(data2)

## [1] 64829    44

#GEE

library(dplyr)
#Renombrar variables
names(data1)[1]="Country"
names(data1)[2]="CODE"
names(data1)[3]="Series"
#Filtrar para tomar valor GEE y no el error estandar
Prueba1=data1%>%
  group_by(Country)%>%
  mutate(Index = ifelse(Series==nth(Series,1), 1, 0))%>%
  filter(Index==1)
#eliminamos filas vacías
Prueba1=Prueba1[-c(215,216,217,218,219),]
names(Prueba1)[5]="Indice"
  Prueba1$Indice=parse_number(Prueba1$Indice)

## Warning: 5 parsing failures.
## row col expected actual
##  46  -- a number     ..
## 129  -- a number     ..
## 139  -- a number     ..
## 144  -- a number     ..
## 164  -- a number     ..

  #eliminamos filas sin valores
Prueba1=Prueba1[-c(46,129,139,144,164),]
str(Prueba1$Indice)

##  num [1:209] -1.457 0.115 -0.444 0.551 1.945 ...
##  - attr(*, "problems")= tibble [5 × 4] (S3: tbl_df/tbl/data.frame)
##   ..$ row     : int [1:5] 46 129 139 144 164
##   ..$ col     : int [1:5] NA NA NA NA NA
##   ..$ expected: chr [1:5] "a number" "a number" "a number" "a number" ...
##   ..$ actual  : chr [1:5] ".." ".." ".." ".." ...

Prueba1$Indice=as.numeric(Prueba1$Indice)
str(Prueba1$Indice)

##  num [1:209] -1.457 0.115 -0.444 0.551 1.945 ...

Limpieza adicional. Es útil reducir todo solo a código de país e indice,

Prueba1$Country=NULL
Prueba1$Series=NULL
Prueba1$Index=NULL
Prueba1$`Series Code`=NULL
Prueba1$std=NULL
#no parece haber diferencias notables

indice de rigurosidad

Limpieza

data3=data2 #copiamos data para tenerla a salvo de cambios
#cambiamos nombres
names(data3)[2]="CODE"
data3[6:34]=NULL
data3[7:15]=NULL
#dejamos country porque la necesitaremos más adelante
data3$RegionCode=NULL
data3$RegionName=NULL #estamos tomando medidas a nivel de pais, no local
data3$Date <- ymd(data3$Date)

Seleccionar para que tome solo rigurosidad en el séptimo día.

Variable dependiente

#la primera parte de esto provienede los datos de Jose Incio.

confirmed <- "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

WorldData<-import(file = confirmed)%>%
  mutate(type="datacon")%>% 
  tidyr::gather(Fecha,Valor,-c(type,"Province/State",
                        "Country/Region",Lat,Long))   #juntando fechas distintas en una sola.

Prueba2= WorldData%>%
  filter(Valor>0)
names(Prueba2)[2]="Country"

str(Prueba2$Country)

##  chr [1:59475] "China" "China" "China" "China" "China" "China" "China" ...

Prueba2$Country=as.factor(Prueba2$Country)
str(Prueba2$Fecha)

##  chr [1:59475] "1/22/20" "1/22/20" "1/22/20" "1/22/20" "1/22/20" "1/22/20" ...

Prueba2$Fecha=mdy(Prueba2$Fecha)
Prueba2$Fecha=as.Date(Prueba2$Fecha)

#juntando provincias en paises
Prueba2=aggregate(Valor
                  ~ Country + Fecha, 
          data = Prueba2,    
          sum)     
#hay un problema tiene paises requerimos códigos, un simple merge de paises y codigos para alinearlo
data4=data3
data4$Date=NULL
data4$GovernmentResponseIndex=NULL
data4$RegionName=NULL
data4$StringencyIndex=NULL
names(data4)[1]="Country"
#eliminamos paises repetidos. #advertencia algunas veces esta parte es lenta en leer, o lo era nates,a hora corre rapido
data4=data4[!duplicated(data4$Country),]
#Perfecto ahora tenemos una base que solo tien paises (key) y codigos
#aplciaremos Merge más adelante

##calculos de dias

#calcular dia 30
Prueba100=Prueba2%>%
group_by(Country)%>%
mutate(dia100= ifelse(Fecha==nth(Fecha,100),1,0))%>%
filter(dia100==1)
Prueba100=merge(Prueba100,data4, by.x="Country", by.y="Country")
Prueba100=Prueba100[,-4]
names(Prueba100)[4]="Code"
#Habiendo hecho eso procedemos a repetirlo pero con el dia 7 que es ek qu eutikizarmeos para rigruosidad
Prueba7=Prueba2%>%
  group_by(Country)%>%
  mutate(dia7 = ifelse(Fecha==nth(Fecha,7), 1, 0))%>%
  filter(dia7==1)
table(Prueba7$Fecha)

## 
## 2020-01-28 2020-01-29 2020-01-30 2020-01-31 2020-02-01 2020-02-02 2020-02-04 
##          6          2          1          2          2          3          2 
## 2020-02-05 2020-02-06 2020-02-07 2020-02-10 2020-02-13 2020-02-20 2020-02-25 
##          2          3          2          1          1          1          1 
## 2020-02-27 2020-02-29 2020-03-01 2020-03-02 2020-03-03 2020-03-04 2020-03-05 
##          2          1          5          5          6          4          6 
## 2020-03-06 2020-03-07 2020-03-08 2020-03-09 2020-03-10 2020-03-11 2020-03-12 
##          4          5          7          3          4          4          9 
## 2020-03-13 2020-03-14 2020-03-15 2020-03-16 2020-03-17 2020-03-18 2020-03-19 
##          1          5          3          3          6          2          7 
## 2020-03-20 2020-03-21 2020-03-22 2020-03-23 2020-03-24 2020-03-25 2020-03-26 
##         14          4          5          3          4          4          7 
## 2020-03-27 2020-03-28 2020-03-29 2020-03-30 2020-03-31 2020-04-02 2020-04-03 
##          2          5          1          2          3          1          1 
## 2020-04-05 2020-04-06 2020-04-08 2020-04-11 2020-04-12 2020-04-16 2020-05-06 
##          1          2          1          2          1          1          2 
## 2020-05-19 
##          1

dia7=merge(Prueba7,data4, by.x="Country", by.y="Country")
#ahora podemos eliminar country en la data original
data3[1]=NULL

Ahora creamos varaibles mergeables

dia7$DIA7=paste(dia7$CODE,dia7$Fecha)
dia7$Country=NULL
dia7$dia7=NULL
dia7$CODE=NULL 
table(dia7$DIA7)

## 
## AFG 2020-03-01 AGO 2020-03-26 ALB 2020-03-15 AND 2020-03-08 ARE 2020-02-04 
##              1              1              1              1              1 
## ARG 2020-03-09 AUS 2020-02-01 AUT 2020-03-02 AZE 2020-03-07 BDI 2020-04-06 
##              1              1              1              1              1 
## BEL 2020-02-10 BEN 2020-03-22 BFA 2020-03-16 BGD 2020-03-14 BGR 2020-03-14 
##              1              1              1              1              1 
## BHR 2020-03-01 BIH 2020-03-11 BLR 2020-03-05 BLZ 2020-03-29 BOL 2020-03-17 
##              1              1              1              1              1 
## BRA 2020-03-03 BRB 2020-03-23 BRN 2020-03-15 BTN 2020-03-12 BWA 2020-04-05 
##              1              1              1              1              1 
## CAF 2020-03-21 CAN 2020-02-01 CHE 2020-03-02 CHL 2020-02-29 CHN 2020-01-28 
##              1              1              1              1              1 
## CIV 2020-03-17 CMR 2020-03-12 COL 2020-03-12 CRI 2020-03-12 CUB 2020-03-18 
##              1              1              1              1              1 
## CYP 2020-03-15 DEU 2020-02-02 DJI 2020-03-24 DMA 2020-03-28 DNK 2020-03-04 
##              1              1              1              1              1 
## DOM 2020-03-07 DZA 2020-03-02 ECU 2020-03-07 EGY 2020-02-20 ERI 2020-03-27 
##              1              1              1              1              1 
## ESP 2020-02-07 EST 2020-03-04 ETH 2020-03-19 FIN 2020-02-04 FJI 2020-03-25 
##              1              1              1              1              1 
## FRA 2020-01-30 GAB 2020-03-20 GBR 2020-02-06 GEO 2020-03-03 GHA 2020-03-20 
##              1              1              1              1              1 
## GIN 2020-03-19 GMB 2020-03-23 GRC 2020-03-03 GTM 2020-03-20 GUY 2020-03-18 
##              1              1              1              1              1 
## HND 2020-03-17 HRV 2020-03-02 HTI 2020-03-26 HUN 2020-03-10 IDN 2020-03-08 
##              1              1              1              1              1 
## IND 2020-02-05 IRL 2020-03-06 IRN 2020-02-25 IRQ 2020-03-01 ISL 2020-03-05 
##              1              1              1              1              1 
## ISR 2020-02-27 ITA 2020-02-06 JAM 2020-03-17 JOR 2020-03-09 JPN 2020-01-28 
##              1              1              1              1              1 
## KAZ 2020-03-19 KEN 2020-03-19 KHM 2020-02-02 KWT 2020-03-01 LAO 2020-03-30 
##              1              1              1              1              1 
## LBN 2020-02-27 LBR 2020-03-22 LBY 2020-03-30 LKA 2020-02-02 LSO 2020-05-19 
##              1              1              1              1              1 
## LTU 2020-03-05 LUX 2020-03-06 LVA 2020-03-08 MAR 2020-03-08 MDA 2020-03-14 
##              1              1              1              1              1 
## MDG 2020-03-26 MEX 2020-03-05 MLI 2020-03-31 MNG 2020-03-16 MOZ 2020-03-28 
##              1              1              1              1              1 
## MRT 2020-03-20 MUS 2020-03-24 MWI 2020-04-08 MYS 2020-01-31 NAM 2020-03-20 
##              1              1              1              1              1 
## NER 2020-03-26 NGA 2020-03-05 NIC 2020-03-25 NLD 2020-03-04 NOR 2020-03-03 
##              1              1              1              1              1 
## NPL 2020-01-31 NZL 2020-03-05 OMN 2020-03-01 PAK 2020-03-02 PAN 2020-03-16 
##              1              1              1              1              1 
## PER 2020-03-12 PHL 2020-02-05 PNG 2020-03-26 POL 2020-03-10 PRT 2020-03-08 
##              1              1              1              1              1 
## PRY 2020-03-14 QAT 2020-03-06 RKS 2020-03-20 ROU 2020-03-03 RUS 2020-02-06 
##              1              1              1              1              1 
## RWA 2020-03-20 SAU 2020-03-08 SDN 2020-03-19 SEN 2020-03-08 SGP 2020-01-29 
##              1              1              1              1              1 
## SLE 2020-04-06 SLV 2020-03-25 SMR 2020-03-04 SOM 2020-03-22 SRB 2020-03-12 
##              1              1              1              1              1 
## SSD 2020-04-11 SUR 2020-03-20 SVN 2020-03-11 SWE 2020-02-07 SWZ 2020-03-20 
##              1              1              1              1              1 
## SYC 2020-03-20 SYR 2020-03-28 TCD 2020-03-25 TGO 2020-03-12 THA 2020-01-28 
##              1              1              1              1              1 
## TJK 2020-05-06 TLS 2020-03-28 TTO 2020-03-20 TUN 2020-03-10 TUR 2020-03-17 
##              1              1              1              1              1 
## TZA 2020-03-22 UGA 2020-03-27 UKR 2020-03-09 URY 2020-03-19 UZB 2020-03-21 
##              1              1              1              1              1 
## VEN 2020-03-20 VNM 2020-01-29 YEM 2020-04-16 ZAF 2020-03-11 ZMB 2020-03-24 
##              1              1              1              1              1 
## ZWE 2020-03-26 
##              1

data3$DIA7=paste(data3$CODE,data3$Date)
data3$Date=NULL
head(data3)

ResGob=merge(data3,dia7, by.x="DIA7", by.y = "DIA7")
#eliminamos valores ya no necesario como el mismo DIA 7 y es tan frustrante eliminarlo despues de tanto esfuerzo.
ResGob$DIA7=NULL
#El valor ya no es necesario es parte de la variable dependiente no de  esta independeinte
ResGob$Valor=NULL
str(ResGob$StringencyIndex)

##  num [1:160] 27.78 33.33 81.48 0 2.78 ...

dataFINAL=merge(ResGob,Prueba1, by.x="CODE",by.y = "CODE")

#infolaw

infocamp = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/public-campaigns-covid.csv"
dataic=import(infocamp)
str(dataic$Date)

##  IDate[1:46886], format: "2020-01-01" "2020-01-02" "2020-01-03" "2020-01-04" "2020-01-05" ...

names(dataic)[1]= "Country"
names(dataic)[3]= "Fecha"
dataic$DIA7=paste(dataic$Code, dataic$Fecha)

c7=merge(dataic, dia7, by.x="DIA7", by.y="DIA7") 
c7=c7[,-c(1,4,7)]
names(c7)=c("Country", "Code", "infoalawk", "Fecha")

#Población Urbana: Evidencia el porcentaje de la población urbana de un país

xurb = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/API_SP.URB.TOTL.IN.ZS_DS2_es_csv_v2_1347951.csv"
dataxurb=import(xurb)
names(dataxurb)=(dataxurb[1,])

dataxurb[,3:62]= NULL
dataxurb[,4:5]= NULL

names(dataxurb)[3]= "%poburb18"
dataxurb$'%poburb18'=round(dataxurb$'%poburb18', digits = 2)

dataxurb=dataxurb[c(-1,-61,-62,-63,-64,-65,-68,-73,-74,-95,-98,-102,-103,-104,-105,-107, -110,-128,-134,-135,-136,-139,-140,-142,-153,-156,-161,-170,-181,-191,-197,-198,-204,-215,-217,-218,-230,-231,-236,-238,-240,-241,-249),]
dataxurb$num=c(1:222)
rownames(dataxurb)=dataxurb[,4]
dataxurb[,4]= NULL

names(dataxurb)[2]= "Code"
names(dataxurb)[1]= "Country"

Capacidad Estatal

#Índice de Desarrollo Humano (Human Development Index): Indicador que integra las variables PBI, Educación y Esperanza de vida

LIDH="https://github.com/CarlaMendozaE/Prueba/blob/master/IDH.xlsx?raw=true"
IDH=import(LIDH)
IDH[,c(1,8,9)]=NULL
names(IDH)[2]= "HDI"
names(IDH)[3]= "EXPECTATIVAVIDA"
names(IDH)[4]= "EXPECTCOLE"
names(IDH)[5]= "YEARS_SCHOOLING"
names(IDH)[6]= "GNI_GROSSNATIONALINCOME"
IDH[,-1]=lapply(IDH[,-1], as.numeric)

## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion

str(IDH)

## 'data.frame':    222 obs. of  6 variables:
##  $ Country                : chr  "Norway" "Switzerland" "Ireland" "Germany" ...
##  $ HDI                    : num  0.954 0.946 0.942 0.939 0.939 ...
##  $ EXPECTATIVAVIDA        : num  82.3 83.6 82.1 81.2 84.7 ...
##  $ EXPECTCOLE             : num  18.1 16.2 18.8 17.1 16.5 ...
##  $ YEARS_SCHOOLING        : num  12.6 13.4 12.5 14.1 12 ...
##  $ GNI_GROSSNATIONALINCOME: num  68059 59375 55660 46946 60221 ...

IDH$HDI= as.numeric(IDH$HDI)
IDH$HDI=round(IDH$HDI, digits = 4)
#Eliminamos filas zzz
IDH=IDH[c(-63,-118,-156,-193:-222),]

#Mergeamos solo con los países que nos interesan 
IDH=merge(IDH,c7,by.x='Country', by.y='Country') 
IDH=merge(IDH,Prueba100,by.x='Country', by.y='Country') 
#Limpiamos
IDH=IDH[,-c(11:12)]
names(IDH)[7]="Code"
names(IDH)[9]="d7"
names(IDH)[10]="d100"

Sin embargo, nos interesa tenerla toda junta en un solo data frame. Así que mergeamos.

Carla=merge(IDH, dataxurb, by.x = "Code", by.y = "Code")
#Carla=merge(Carla, dataxrural, by.x = "Code", by.y = "Code")

#LIMPIA
str(Carla)

## 'data.frame':    140 obs. of  12 variables:
##  $ Code                   : chr  "AFG" "AGO" "ALB" "AND" ...
##  $ Country.x              : chr  "Afghanistan" "Angola" "Albania" "Andorra" ...
##  $ HDI                    : num  0.496 0.575 0.791 0.857 0.866 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 78.5 81.8 77.8 ...
##  $ EXPECTCOLE             : num  10.1 11.8 15.2 13.3 13.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.05 10.16 10.95 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 12300 48641 66912 ...
##  $ infoalawk              : int  2 1 2 0 0 2 2 2 2 1 ...
##  $ d7                     : Date, format: "2020-03-01" "2020-03-26" ...
##  $ d100                   : Date, format: "2020-06-02" "2020-06-27" ...
##  $ Country.y              : chr  "Afganistán" "Angola" "Albania" "Andorra" ...
##  $ %poburb18              : num  25.5 65.5 60.3 88.1 86.5 ...

Carla=Carla[,-11]
names(Carla)[2]="Country"

####AYUDA ECONOMICA LIMPIEZA INICIAL

#EXTRAEMOS LA DATA
library(rio)
linkayuda="https://raw.githubusercontent.com/CarlosGDiez/BasesLimpias/master/Rigurosidad.csv"
dataayuda=import(linkayuda)
#ELIMINACION DE COLUMNAS NO NECESARIAS
dataayuda = dataayuda[,c(1:5, 21)]
#ELIMINAMOS LAS REGIONES (SOLO NOS INTERESAN LOS PAISES)
#USA
dataayuda <- dataayuda[-c(48601 :62640), ]
#UK
dataayuda <- dataayuda[-c(16741 :17820), ]
#ELIMINAMOS LAS COLUMNAS DE REGION
dataayuda <- dataayuda[,-c(3, 4) ]
#SIMPLIFICAMOS LOS NOMBRES
names(dataayuda) = c("pais", "code", "fecha", "apoyo")
#TRANSFORMAMOS LA COLUMNA 3 EN FECHAS
dataayuda[ , 3 ] <- ymd(dataayuda[, 3])

BASE DIA 1

#CASOS CONFIRMADOS DE CONTAGIOS
confirmed <- "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

dataconfirmados<-import(file = confirmed)%>%
  mutate(type="datacon")%>% 
  tidyr::gather(Fecha,Valor,-c(type,"Province/State",
                        "Country/Region",Lat,Long))

dataconfirmados[ , 6] <- mdy(dataconfirmados[, 6])
prueba4= dataconfirmados%>%
  filter(Valor>0)
#JUNTAMOS LAS PROVINCIAS A UN SOLO PAIS
names(prueba4)[2]="pais"
names(prueba4)[6]="fecha"
prueba4=aggregate(Valor # dependientes
          ~ pais + fecha, # nivel
          data = prueba4,    # data
          sum)       # operacion
#ALINEAMOS CODIGOS Y PAISES (DATA SOLO DE PAISES Y CODIGOS)
datacode=dataayuda
datacode$fecha=NULL
datacode$apoyo=NULL
#ELIMINAMOS PAISES REPETIDOS
datacode=datacode[!duplicated(datacode$pais),]
#DIA 7 
dia7=prueba4%>%
  group_by(pais)%>%
  mutate(dia7 = ifelse(fecha==nth(fecha,7), 1, 0))%>%
  filter(dia7==1)
str(dia7$fecha)

##  Date[1:188], format: "2020-01-28" "2020-01-28" "2020-01-28" "2020-01-28" "2020-01-28" ...

dia7$fecha=as.Date(dia7$fecha)
#DIA 7 Y MERGE CON CODIGOS   
dia7_final=merge(dia7,datacode, by.x="pais", by.y="pais")

JUNTAMOS LOS DIAS CON LOS DATOS PARA CADA DIA

#CREACION DEL ID Y FECHA DEL DIA 7
dia7_final$DIA7=paste(dia7_final$code,dia7_final$fecha)
dia7_final$pais=NULL
dia7_final$dia7=NULL
dia7_final$code=NULL
dataayuda$DIA7=paste(dataayuda$code,dataayuda$fecha)
dataayuda$fecha=NULL
head(dataayuda)

#AGREGAMOS LOS DATOS DE APOYO ECONOMICO
APOYOECO=merge(dia7_final,dataayuda, by.x="DIA7", by.y = "DIA7")
APOYOECO$DIA7=NULL
APOYOECO$Valor=NULL

####DENSIDAD DE LA POBLACION EXTRACCION Y LIMPIEZA INICIAL

#EXTRAEMOS LA DATA

linkdensidad="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/Excel%20densidad.xlsx.xls"
datadensidad=import(linkdensidad)

## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...

#ELIMINAMOS FILAS INNCESESARIAS INICIALES
datadensidad <- datadensidad[-c(1, 2), ]
#LA PRIMERA FILA SE VUELVE HEADLINE
names(datadensidad) <- as.matrix(datadensidad[1, ])
datadensidad <- datadensidad[-1, ]
datadensidad[] <- lapply(datadensidad, function(x) type.convert(as.character(x)))
#ELIMINAMOS LAS COLUMNAS INNECESARIAS
datadensidad = datadensidad[,c(1, 2, 63)]
#SIMPLIFICAMOS LOS NOMBRES DE LAS COLUMNAS
names(datadensidad) = c("pais", "code", "2018")

LIMPIEZA MÁS PROFUNDA

#ORDENAMOS LA DATA ALFABETICAMENTE
prueba3 <- datadensidad[order(datadensidad$pais),]
rownames(prueba3)<-c(1:264)
#ELIMINAMOS LAS FILAS INNECESARIAS
prueba3 <- prueba3[-c(8, 9, 38, 40, 41, 61:64, 73:77, 81, 99, 100, 104, 106:109, 130:133, 135, 142:144, 158:161, 178, 182, 183, 185, 186, 196, 197, 215, 219, 220, 228:230, 253, 261), ]
#NOMBRE FINAL
datadensidadfinal<-prueba3

####TASA DE DESEMPLEO

#EXTRAEMOS LA DATA
datadesempleo <- "https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/datadesempleooriginal.csv"
datadesempleo=import(datadesempleo)
#SIMPLIFICAMOS LOS NOMBRES
names(datadesempleo) = c("pais", "tasa")
#ORDENAMOS LA DATA ALFABETICAMENTE
datadesempleo <- datadesempleo[order(datadesempleo$pais),]
rownames(datadesempleo)<-c(1:187)

DATA PAISES (PARA INCLUIR LOS CODIGOS DE LOS PAISES)

data_salud <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
gasto_salud=import(data_salud)
gasto_salud = gasto_salud[,c(1, 2)]
gasto_salud = gasto_salud[-c(1),]
names(gasto_salud) = c("pais", "code")
#MERGE (PARA AGREGAR LOS CODIGOS)
prueba1=merge(datadesempleo,gasto_salud,all.x=T,all.y=T)

PRUEBA 1, LIMPIEZA DEL MERGE

#ELIMINAMOS LAS FILAS INNCESARIAS
prueba1 = prueba1[-c(1, 3:5, 8:11, 21:25, 28, 43, 86, 93, 94, 99:102, 108, 131: 134, 190, 192, 191, 198, 206, 212:215, 217:220, 228, 233, 234, 237, 246, 251, 252, 266, 267, 287, 288, 295, 308),]
#CAMBIAMOS NOMBRES
prueba1$pais =   gsub("Arabia Saudita", "Arabia Saudí", prueba1$pais)
prueba1$pais =   gsub("Bahráin", "Bahrein", prueba1$pais)
prueba1$pais =   gsub("Belarús", "Bielorrusia", prueba1$pais)
prueba1$pais =   gsub("Benín", "Benin", prueba1$pais)
prueba1$pais =   gsub("Birmania; Myanmar", "Birri", prueba1$pais)
prueba1$pais =   gsub("Birri", "Birmania", prueba1$pais)
prueba1$pais =   gsub("Myanmar", "Birmania", prueba1$pais)
prueba1$pais =   gsub("Bosnia y Hercegovina", "Bosnia y Herzegovina", prueba1$pais)
prueba1$pais =   gsub("Botsuana", "Botswana", prueba1$pais)
prueba1$pais =   gsub("Brunei Darussalam", "Brunéi", prueba1$pais)
prueba1$pais =   gsub("Brunéi", "Brunei", prueba1$pais)
prueba1$pais =   gsub("Congo, República del", "Congo", prueba1$pais)
prueba1$pais =   gsub("Congo, República Democrática del", "República Democrática del Congo", prueba1$pais)
prueba1$pais =   gsub("Côte d'Ivoire", "Costa de Marfil", prueba1$pais)
prueba1$pais =   gsub("Corea, República Popular Democrática de", "Corea del Norte", prueba1$pais)
prueba1$pais =   gsub("Corea, República de", "Corea del Sur", prueba1$pais)
prueba1$pais =   gsub("Egipto, República Árabe de", "Egipto", prueba1$pais)
prueba1$pais =   gsub("Federación de Rusia", "Rusia", prueba1$pais)
prueba1$pais =   gsub("Fiyi", "Fiji", prueba1$pais)
prueba1$pais =   gsub("Hong Kong, Región Administrativa Especial", "Hong Kong", prueba1$pais)
prueba1$pais =   gsub("Irán, República Islámica del", "Irán", prueba1$pais)
prueba1$pais =   gsub("Kazajstán", "Kazajistán", prueba1$pais)
prueba1$pais =   gsub("Kenia", "Kenya", prueba1$pais)
prueba1$pais =   gsub("República Democrática Popular Lao", "Laos", prueba1$pais)
prueba1$pais =   gsub("Lesoto", "Lesotho", prueba1$pais)
prueba1$pais =   gsub("Macedonia del Norte", "Macedonia", prueba1$pais)
prueba1$pais =   gsub("República de Moldova", "Moldavia", prueba1$pais)
prueba1$pais =   gsub("Malaui", "Malawi", prueba1$pais)
prueba1$pais =   gsub("Nueva Zelandia", "Nueva Zelanda", prueba1$pais)
prueba1$pais =   gsub("Palaos", "Palau", prueba1$pais)
prueba1$pais =   gsub("Papua-Nueva Guinea", "Papua Nueva Guinea", prueba1$pais)
prueba1$pais =   gsub("República de Moldova", "Moldavia", prueba1$pais)
prueba1$pais =   gsub("República Árabe Siria", "Siria", prueba1$pais)
prueba1$pais =   gsub("Rwanda", "Ruanda", prueba1$pais)
prueba1$pais =   gsub("Timor-Leste", "Timor Oriental", prueba1$pais)
prueba1$pais =   gsub("Viet Nam", "Vietnam", prueba1$pais)
prueba1$pais =   gsub("Yemen, Rep. del", "Yemen", prueba1$pais)
prueba1$pais =   gsub("Viet Nam", "Vietnam", prueba1$pais)
prueba1$pais =   gsub("Zimbabue", "Zimbabwe", prueba1$pais)
prueba1$pais =   gsub("Kirguizistán", "Kirguistán", prueba1$pais)
prueba1$pais =   gsub("Bután", "Bhután", prueba1$pais)
prueba1$pais =   gsub("Suriname", "Surinam", prueba1$pais)
prueba1$pais =   gsub("Tanzanía", "Tanzania", prueba1$pais)
#JUNTAMOS LAS FILAS CON NOMBRES IGUALES

prueba2=group_by(prueba1, pais)%>% 
  summarize(tasa=max(tasa, na.rm = TRUE),
            code=max(code, na.rm= TRUE))

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA

## `summarise()` ungrouping output (override with `.groups` argument)

#CAMBIAMOS EL ORDEN Y NOMBRE FINAL
datadesempleofinal <- prueba2[c("pais", "code", "tasa")]
datadesempleofinal$tasa=  gsub("-Inf", NA, datadesempleofinal$tasa)

#Agregar otras variable sd egobernanza

regulatory quality

library(rio)
library(htmltab)

perro = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/258c45e7-1b68-4b8e-853d-a2554f1bb145_Data.csv"
regulatory = import(perro)

str(regulatory)

## 'data.frame':    219 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country Code : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ Series Name  : chr  "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" ...
##  $ Series Code  : chr  "RQ.EST" "RQ.EST" "RQ.EST" "RQ.EST" ...
##  $ 2019 [YR2019]: chr  "-1.120555" "0.2743798" "-1.303379" "-0.2996051" ...

regulatory$`Country Name` = NULL
regulatory$`Series Code` = NULL
regulatory$`Series Name` = NULL
regulatory[regulatory == '..'] <- NA
regulatory$`2019 [YR2019]` = as.numeric(regulatory$`2019 [YR2019]`)
names(regulatory) = c("Code","Regulatory_quality")


regulatory=na.omit(regulatory)

control de la corrupción

gato= "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/51253f2e-7374-408f-8685-c729a64d043a_Data.csv"
control_co = import(gato)

str(control_co)

## 'data.frame':    219 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country Code : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ Series Name  : chr  "Control of Corruption: Estimate" "Control of Corruption: Estimate" "Control of Corruption: Estimate" "Control of Corruption: Estimate" ...
##  $ Series Code  : chr  "CC.EST" "CC.EST" "CC.EST" "CC.EST" ...
##  $ 2019 [YR2019]: chr  "-1.401076" "-0.5287576" "-0.6218498" "1.843883" ...

control_co$`Country Name` = NULL
control_co$`Series Code` = NULL
control_co$`Series Name` = NULL
control_co[control_co == '..'] <- NA
control_co$`2019 [YR2019]` = as.numeric(control_co$`2019 [YR2019]`)
names(control_co) = c("Code","control_co")
control_co=na.omit(control_co)

#Rule of law

AXA = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/a9249c7d-95ab-4618-9160-3a247dea2bae_Data.csv"
ruleof = import(AXA)
str(ruleof)

## 'data.frame':    219 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country Code : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ Series Name  : chr  "Rule of Law: Estimate" "Rule of Law: Estimate" "Rule of Law: Estimate" "Rule of Law: Estimate" ...
##  $ Series Code  : chr  "RL.EST" "RL.EST" "RL.EST" "RL.EST" ...
##  $ 2019 [YR2019]: chr  "-1.713527" "-0.4111794" "-0.8154638" "1.335098" ...

ruleof$`Country Name` = NULL
ruleof$`Series Code` = NULL
ruleof$`Series Name` = NULL
ruleof[ruleof == '..'] <- NA
ruleof[2] = lapply(ruleof[2], as.numeric)
names(ruleof) = c("Code","Ruleoflaw")

ruleof=na.omit(ruleof)

#Insertando Voice and accountability

VA = 'https://github.com/AriannaNKZC/Estad-2/raw/master/Voice_and_accountability.csv'
VocA = import(VA)

str(VocA)

## 'data.frame':    219 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country Code : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ Series Name  : chr  "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" ...
##  $ Series Code  : chr  "VA.EST" "VA.EST" "VA.EST" "VA.EST" ...
##  $ 2019 [YR2019]: chr  "-0.9880323" "0.1518047" "-1.037679" ".." ...

VocA$`Country Name` = NULL
VocA$`Series Code` = NULL
VocA$`Series Name` = NULL
VocA[VocA == '..'] <- NA
VocA[2] = lapply(VocA[2], as.numeric)
names(VocA) = c("Code","Voice_acco")
VocA=na.omit(VocA)

#Insertando Political stability

PS='https://github.com/AriannaNKZC/Estad-2/raw/master/e0757e7a-8829-44d2-a7a3-11a580c19a53_Data.csv'
PolS = import(PS)
str(PolS)

## 'data.frame':    219 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country Code : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ Series Name  : chr  "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" ...
##  $ Series Code  : chr  "PV.EST" "PV.EST" "PV.EST" "PV.EST" ...
##  $ 2019 [YR2019]: chr  "-2.649407" "0.1185695" "-1.003575" "1.16038" ...

PolS$`Country Name` = NULL
PolS$`Series Code` = NULL
PolS$`Series Name` = NULL
PolS[PolS == '..'] <- NA
PolS[2] = lapply(PolS[2], as.numeric)
names(PolS) =c("Code","Political_sta")
PolS=na.omit(PolS)

elefante=merge(regulatory, control_co, by.x = "Code", by.y = "Code")
elefante=merge(elefante, ruleof, by.x = "Code", by.y = "Code")
elefante=merge(elefante, VocA, by.x = "Code", by.y = "Code")
elefante=merge(elefante, PolS, by.x = "Code", by.y = "Code")

####MERGE DE LAS 3 TABLAS

DATA1=merge(APOYOECO,datadensidad, by.x="code", by.y="code")
DATAFINAL=merge(DATA1,datadesempleofinal, by.x="code", by.y="code")
DATAFINAL = DATAFINAL[,c(1:4, 6, 8)]
names(DATAFINAL) = c("Code", "Fecha", "Pais", "Apoyo", "Densidad", "Desempleo")
DATAFINAL=DATAFINAL[!duplicated(DATAFINAL$Pais),]
str(DATAFINAL)

## 'data.frame':    152 obs. of  6 variables:
##  $ Code     : chr  "AFG" "AGO" "ALB" "AND" ...
##  $ Fecha    : Date, format: "2020-03-01" "2020-03-26" ...
##  $ Pais     : chr  "Afghanistan" "Angola" "Albania" "Andorra" ...
##  $ Apoyo    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Densidad : num  56.9 24.7 104.6 163.8 135.6 ...
##  $ Desempleo: chr  "24" "7" "14" "4" ...

DATAFINAL$Densidad=as.numeric(DATAFINAL$Densidad)
DATAFINAL$Desempleo=as.numeric(DATAFINAL$Desempleo)
DATAFINAL$Apoyo = as.factor(DATAFINAL$Apoyo)
levels(DATAFINAL$Apoyo) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
names(DATAFINAL) = c("Code", "Fecha (Dia 7 de cada pais)", "Pais", "Apoyo Economico", "Densidad (2018)", "Desempleo (% al 2019)")
str(DATAFINAL)

## 'data.frame':    152 obs. of  6 variables:
##  $ Code                      : chr  "AFG" "AGO" "ALB" "AND" ...
##  $ Fecha (Dia 7 de cada pais): Date, format: "2020-03-01" "2020-03-26" ...
##  $ Pais                      : chr  "Afghanistan" "Angola" "Albania" "Andorra" ...
##  $ Apoyo Economico           : Factor w/ 3 levels "Sin apoyo","Menos del 50% del sueldo",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)           : num  56.9 24.7 104.6 163.8 135.6 ...
##  $ Desempleo (% al 2019)     : num  24 7 14 4 2 8 6 6 5 NA ...

#el grange merge

names(Tabla_Final)[1]="Code"
Tabla_Final$coPAIS = NULL
names(ruleof)[1]="Code"
str(esperanzita)

## 'data.frame':    155 obs. of  4 variables:
##  $ CODE    : chr  "AFG" "AGO" "ALB" "AND" ...
##  $ Country : chr  "Afganistán" "Angola" "Albania" "Andorra" ...
##  $ pobla   : num  38041754 31825295 2854191 77142 9770529 ...
##  $ promedio: num  3.18e-05 3.59e-05 5.29e-03 1.20e-01 8.19e-05 ...

names(dataFINAL)[1]="Code"
names(esperanzita)[1]="Code"

esperanzita$Country = NULL
names(dataFINAL)[3]="d7"
data=merge(Carla, Tabla_Final, by.x = "Code", by.y = "Code")
data=merge(data, dataFINAL, by.x = "Code", by.y = "Code")
data=merge(data, DATAFINAL, by.x = "Code", by.y = "Code")
data=merge(data, Prueba100, by.x = "Code", by.y = "Code") 
data=merge(data, elefante, by.x = "Code", by.y = "Code") 
data=merge(data, esperanzita, by.x = "Code", by.y = "Code")



#Contagiados al día30

Eliminamos lo que no sirve

names(data)

##  [1] "Code"                       "Country.x"                 
##  [3] "HDI"                        "EXPECTATIVAVIDA"           
##  [5] "EXPECTCOLE"                 "YEARS_SCHOOLING"           
##  [7] "GNI_GROSSNATIONALINCOME"    "infoalawk"                 
##  [9] "d7.x"                       "d100"                      
## [11] "%poburb18"                  "PAIS"                      
## [13] "GS_2017"                    "PPP_2018"                  
## [15] "StringencyIndex"            "d7.y"                      
## [17] "Indice"                     "Fecha (Dia 7 de cada pais)"
## [19] "Pais"                       "Apoyo Economico"           
## [21] "Densidad (2018)"            "Desempleo (% al 2019)"     
## [23] "Country.y"                  "Fecha"                     
## [25] "Valor"                      "Regulatory_quality"        
## [27] "control_co"                 "Ruleoflaw"                 
## [29] "Voice_acco"                 "Political_sta"             
## [31] "pobla"                      "promedio"

data=data[,c(-16,-18, -19, -23,-24)]

Renombramos

names(data)[2]="Country"
names(data)[9]="d7"
names(data)[20]="Contagd100"

Cuántos na’s hay

data[!complete.cases(data),] #13 Valores perdidos!

data = data[complete.cases(data),] #Los eliminamos

#Tercer entregable

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::between()         masks data.table::between()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x dplyr::first()           masks data.table::first()
## x purrr::flatten()         masks jsonlite::flatten()
## x data.table::hour()       masks lubridate::hour()
## x lubridate::intersect()   masks base::intersect()
## x data.table::isoweek()    masks lubridate::isoweek()
## x dplyr::lag()             masks stats::lag()
## x dplyr::last()            masks data.table::last()
## x data.table::mday()       masks lubridate::mday()
## x data.table::minute()     masks lubridate::minute()
## x data.table::month()      masks lubridate::month()
## x data.table::quarter()    masks lubridate::quarter()
## x data.table::second()     masks lubridate::second()
## x lubridate::setdiff()     masks base::setdiff()
## x purrr::transpose()       masks data.table::transpose()
## x lubridate::union()       masks base::union()
## x data.table::wday()       masks lubridate::wday()
## x data.table::week()       masks lubridate::week()
## x data.table::yday()       masks lubridate::yday()
## x data.table::year()       masks lubridate::year()

library(DescTools)
library(readxl)
library(foreign)
library(descr)
library(DescTools)
library(haven)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:purrr':
## 
##     some

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:DescTools':
## 
##     Recode

library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following objects are masked from 'package:DescTools':
## 
##     AUC, ICC, SD

library(PMCMRplus)
library(Rmisc)

## Loading required package: lattice

## Loading required package: plyr

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:purrr':
## 
##     compact

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

str(data)

## 'data.frame':    128 obs. of  27 variables:
##  $ Code                   : chr  "AFG" "AGO" "AND" "ARE" ...
##  $ Country                : chr  "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : int  2 1 0 0 2 2 2 2 2 2 ...
##  $ d7                     : Date, format: "2020-03-01" "2020-03-26" ...
##  $ d100                   : Date, format: "2020-06-02" "2020-06-27" ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ PAIS                   : chr  "Afganistan" "Angola" "Andorra" "Emiratos Arabes Unidos" ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Indice                 : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ Apoyo Economico        : Factor w/ 3 levels "Sin apoyo","Menos del 50% del sueldo",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : num  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : int  16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ pobla                  : num  38041754 31825295 77142 9770529 44938712 ...
##  $ promedio               : num  3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...

data = data[!duplicated(data),]
scale_y_continuous(labels = scales::comma)

## <ScaleContinuousPosition>
##  Range:  
##  Limits:    0 --    1

#row.names(data) = data$Country
#data$Country = NULL #Elimino country porque ya está como row name y el Code solo sirve para el merge

Reconfigurando variables

#arreglando las númericas

data$Contagd100 = as.numeric(data$Contagd100)
data$`Desempleo (% al 2019)`  = as.numeric(data$`Desempleo (% al 2019)`) 
table(data$`Apoyo Economico`)

## 
##                Sin apoyo Menos del 50% del sueldo   Más del 50% del sueldo 
##                      117                        7                        0

#Arreglando las ordinales

data$`Apoyo Economico` = as.ordered(data$`Apoyo Economico`)
str(data$`Apoyo Economico`)

##  Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...

str(data$infoalawk)

##  int [1:124] 2 1 0 0 2 2 2 2 2 2 ...

data$infoalawk = as.ordered(data$infoalawk)
levels(data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(data$infoalawk)

## 
##               Ninguna Campañas del gobierno   Campañas integrales 
##                    18                    19                    87

#POSTER

library(tidyverse)
library(DescTools)
library(readxl)
library(foreign)
library(descr)
library(DescTools)
library(haven)
library(car)
library(psych)
library(PMCMRplus)
library(Rmisc)
library(htmltab)
library(stringr)
library(polycor)

## 
## Attaching package: 'polycor'

## The following object is masked from 'package:psych':
## 
##     polyserial

library(ggcorrplot)
library(psych)
library(matrixcalc)
library(GPArotation)
library(plotly)

## 
## Attaching package: 'plotly'

## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:rio':
## 
##     export

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(fpc)
library(cluster)
library(dbscan)

## 
## Attaching package: 'dbscan'

## The following object is masked from 'package:fpc':
## 
##     dbscan

library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse

## The following object is masked from 'package:DescTools':
## 
##     %nin%

## The following object is masked from 'package:base':
## 
##     isFALSE

library(dplyr)

library(haven)
library(jtools)

## 
## Attaching package: 'jtools'

## The following object is masked from 'package:BBmisc':
## 
##     %nin%

## The following object is masked from 'package:DescTools':
## 
##     %nin%

str(data)

## 'data.frame':    124 obs. of  27 variables:
##  $ Code                   : chr  "AFG" "AGO" "AND" "ARE" ...
##  $ Country                : chr  "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
##  $ d7                     : Date, format: "2020-03-01" "2020-03-26" ...
##  $ d100                   : Date, format: "2020-06-02" "2020-06-27" ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ PAIS                   : chr  "Afganistan" "Angola" "Andorra" "Emiratos Arabes Unidos" ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Indice                 : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ Apoyo Economico        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : num  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : num  16509 259 852 16240 25987 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ pobla                  : num  38041754 31825295 77142 9770529 44938712 ...
##  $ promedio               : num  3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...

data$Code = NULL
data$d7 = NULL
data$d100 = NULL
data$`%pobrur18` = NULL
names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "PAIS"                    "GS_2017"                
## [11] "PPP_2018"                "StringencyIndex"        
## [13] "Indice"                  "Apoyo Economico"        
## [15] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [17] "Contagd100"              "Regulatory_quality"     
## [19] "control_co"              "Ruleoflaw"              
## [21] "Voice_acco"              "Political_sta"          
## [23] "pobla"                   "promedio"

data$GEE = data$Indice
data$Indice = NULL
data$PAIS = NULL

data=data[c(1:105, 107:124),]

names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "pobla"                   "promedio"               
## [23] "GEE"

theData = data
names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "pobla"                   "promedio"               
## [23] "GEE"

theData = (data[, c(7, 11, 12, 16:20,23)])

table(theData$`Apoyo Economico`)

## 
##                Sin apoyo Menos del 50% del sueldo 
##                      116                        7

theData$infoalawk = NULL
theData$`Apoyo Economico`= NULL
theData$StringencyIndex = NULL
#theData$`Apoyo Economico` = as.numeric(theData$`Apoyo Economico`)
#theData$infoalawk = as.numeric(theData$infoalawk)
#theData$Voice_acco = NULL
str(theData)

## 'data.frame':    123 obs. of  6 variables:
##  $ Regulatory_quality: num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co        : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw         : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco        : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta     : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ GEE               : num  -1.457 -1.052 1.945 1.431 0.026 ...

#cambiando a nombres más bonitos

lapiz=polycor::hetcor(theData)$correlations

Explorar correlaciones:

ggcorrplot(lapiz)

#evaluandos ignificancia
ggcorrplot(lapiz,
          p.mat = cor_pmat(lapiz),
          insig = "blank",
          title = "Gráfico 1: Matriz de correlación")

verificar si los datos se pueden factorizar

psych::KMO(lapiz)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = lapiz)
## Overall MSA =  0.88
## MSA for each item = 
## Regulatory_quality         control_co          Ruleoflaw         Voice_acco 
##               0.84               0.89               0.88               0.89 
##      Political_sta                GEE 
##               0.96               0.86

Verificar si la matriz de correlaciones es adecuada

cortest.bartlett(lapiz,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)
is.singular.matrix(lapiz)

## [1] FALSE

determinar en cuantos factores o variables latentes podriamos redimensionar la data

fa.parallel(theData, fm = 'ML', fa = 'fa')

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

Redimensionar a numero menor de factores

Resultado inicial:

mandarina <- fa(theData,nfactors = 1,cor = 'mixed',rotate ="varimax",fm="minres")

## mixed.cor is deprecated, please use mixedCor.
print(mandarina$loadings)

## 
## Loadings:
##                    MR1  
## Regulatory_quality 0.945
## control_co         0.955
## Ruleoflaw          0.984
## Voice_acco         0.795
## Political_sta      0.800
## GEE                0.956
## 
##                  MR1
## SS loadings    4.961
## Proportion Var 0.827

resultado visual

fa.diagram(mandarina, main = c("Gráfico 2: Árbol de factorización del primer modelo"))

Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?

mandarina$crms

## [1] 0.03183335

¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?

mandarina$RMSEA

##      RMSEA      lower      upper confidence 
##  0.2072691  0.1583127  0.2617740  0.9000000

¿El índice de Tucker-Lewis es mayor a 0.9?

mandarina$TLI

## [1] 0.9248696

¿Qué variables aportaron mas a los factores?

sort(mandarina$communality)

##         Voice_acco      Political_sta Regulatory_quality         control_co 
##          0.6325016          0.6405237          0.8922293          0.9126989 
##                GEE          Ruleoflaw 
##          0.9143820          0.9686808

¿Qué variables contribuyen a mas de un factor? #conviene que salga 1

sort(mandarina$complexity)

##          Ruleoflaw         Voice_acco      Political_sta         control_co 
##                  1                  1                  1                  1 
## Regulatory_quality                GEE 
##                  1                  1

factorial_casos<-as.data.frame(mandarina$scores) #en esta no me sale el factorial
head(factorial_casos)

summary(factorial_casos)

##       MR1         
##  Min.   :-1.8104  
##  1st Qu.:-0.7263  
##  Median :-0.2219  
##  Mean   : 0.0000  
##  3rd Qu.: 0.5883  
##  Max.   : 2.0378

agregandolo a la data

AJA=cbind(data[1],as.data.frame(mandarina$scores))

data$Gobernanza= normalize(AJA$MR1, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

#{r} data$Medidas_tempranas=normalize(AJA$MR2, method = "range", margin=2, # by column range = c(0, 10)) #

data$Contagd100 = (data$Contagd100/data$pobla)*100
names(data)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "pobla"                   "promedio"               
## [23] "GEE"                     "Gobernanza"

data_regre=data
names(data_regre)

##  [1] "Country"                 "HDI"                    
##  [3] "EXPECTATIVAVIDA"         "EXPECTCOLE"             
##  [5] "YEARS_SCHOOLING"         "GNI_GROSSNATIONALINCOME"
##  [7] "infoalawk"               "%poburb18"              
##  [9] "GS_2017"                 "PPP_2018"               
## [11] "StringencyIndex"         "Apoyo Economico"        
## [13] "Densidad (2018)"         "Desempleo (% al 2019)"  
## [15] "Contagd100"              "Regulatory_quality"     
## [17] "control_co"              "Ruleoflaw"              
## [19] "Voice_acco"              "Political_sta"          
## [21] "pobla"                   "promedio"               
## [23] "GEE"                     "Gobernanza"

#rownames(data_regre)=data$Country
data_regre$Country = NULL
str(data_regre)

## 'data.frame':    123 obs. of  23 variables:
##  $ HDI                    : num  0.496 0.575 0.857 0.866 0.83 ...
##  $ EXPECTATIVAVIDA        : num  64.5 60.8 81.8 77.8 76.5 ...
##  $ EXPECTCOLE             : num  10.1 11.8 13.3 13.6 17.6 ...
##  $ YEARS_SCHOOLING        : num  3.93 5.13 10.16 10.95 10.56 ...
##  $ GNI_GROSSNATIONALINCOME: num  1746 5555 48641 66912 17611 ...
##  $ infoalawk              : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
##  $ %poburb18              : num  25.5 65.5 88.1 86.5 91.9 ...
##  $ GS_2017                : num  11.78 2.79 10.32 3.33 9.12 ...
##  $ PPP_2018               : num  524 3290 41793 43839 11684 ...
##  $ StringencyIndex        : num  27.78 33.33 0 2.78 11.11 ...
##  $ Apoyo Economico        : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Densidad (2018)        : num  56.9 24.7 163.8 135.6 16.3 ...
##  $ Desempleo (% al 2019)  : num  24 7 4 2 8 6 6 5 7 1 ...
##  $ Contagd100             : num  0.043397 0.000814 1.104457 0.166214 0.057828 ...
##  $ Regulatory_quality     : num  -1.121 -0.894 1.228 0.979 -0.493 ...
##  $ control_co             : num  -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
##  $ Ruleoflaw              : num  -1.714 -1.054 1.58 0.84 -0.431 ...
##  $ Voice_acco             : num  -0.988 -0.777 1.139 -1.122 0.6 ...
##  $ Political_sta          : num  -2.649 -0.311 1.615 0.703 -0.12 ...
##  $ pobla                  : num  38041754 31825295 77142 9770529 44938712 ...
##  $ promedio               : num  3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...
##  $ GEE                    : num  -1.457 -1.052 1.945 1.431 0.026 ...
##  $ Gobernanza             : num  0.101 1.694 8.687 6.921 3.725 ...

names(data_regre)=c("IDH","EXPECTATIVAVIDA", "EXPECTCOLE", "añosEscol","RentaNacional", "Campañas informativas", "PoblacionUrbana", "GastoenSalud", "PBI per cápita (2018)", "Indice de Rigurosidad", "Apoyo Economico", "Densidad", "Desempleo (% al 2019)","Contagiados", "Regulatory quality", "control of corruption", "rule of law", "voice_acco", "political stability", "pobla", "promedio", "Indice de efectividad de la gobernanza","gobernanza")

MINARIS=formula(Contagiados~data_regre$EXPECTCOLE+data_regre$Densidad+data_regre$voice_acco+data_regre$RentaNacional +data_regre$PoblacionUrbana)
MINARISA=lm(MINARIS,data=data_regre)
summary(MINARISA)

## 
## Call:
## lm(formula = MINARIS, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66663 -0.07317 -0.01813  0.04735  0.96256 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 2.378e-01  1.110e-01   2.143 0.034219 *  
## data_regre$EXPECTCOLE      -3.578e-02  9.477e-03  -3.776 0.000252 ***
## data_regre$Densidad         7.437e-05  7.216e-05   1.031 0.304850    
## data_regre$voice_acco      -3.590e-02  2.410e-02  -1.490 0.138916    
## data_regre$RentaNacional    1.232e-05  1.294e-06   9.522 2.93e-16 ***
## data_regre$PoblacionUrbana  2.408e-03  1.144e-03   2.105 0.037436 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1888 on 117 degrees of freedom
## Multiple R-squared:  0.5764, Adjusted R-squared:  0.5583 
## F-statistic: 31.84 on 5 and 117 DF,  p-value: < 2.2e-16

ewe=formula(Contagiados~data_regre$gobernanza+data_regre$Densidad+data_regre$RentaNacional+data_regre$EXPECTCOLE)
uwu=lm(ewe,data=data_regre)
summary(uwu)

## 
## Call:
## lm(formula = ewe, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63850 -0.07487 -0.01371  0.03872  0.87638 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               2.737e-01  9.645e-02   2.838  0.00534 ** 
## data_regre$gobernanza    -3.812e-02  1.236e-02  -3.083  0.00255 ** 
## data_regre$Densidad       7.892e-05  7.051e-05   1.119  0.26527    
## data_regre$RentaNacional  1.574e-05  1.381e-06  11.393  < 2e-16 ***
## data_regre$EXPECTCOLE    -1.972e-02  9.072e-03  -2.173  0.03177 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1862 on 118 degrees of freedom
## Multiple R-squared:  0.5843, Adjusted R-squared:  0.5702 
## F-statistic: 41.46 on 4 and 118 DF,  p-value: < 2.2e-16

MINARISE=formula(Contagiados~data_regre$gobernanza+data_regre$PoblacionUrbana+data_regre$RentaNacional +data_regre$EXPECTCOLE)
MINARISEM=lm(MINARISE,data=data_regre)
summary(MINARISEM)

## 
## Call:
## lm(formula = MINARISE, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65006 -0.07640 -0.02578  0.04720  0.88223 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 2.758e-01  9.503e-02   2.902  0.00442 ** 
## data_regre$gobernanza      -3.451e-02  1.249e-02  -2.762  0.00666 ** 
## data_regre$PoblacionUrbana  1.886e-03  1.137e-03   1.659  0.09970 .  
## data_regre$RentaNacional    1.478e-05  1.525e-06   9.689  < 2e-16 ***
## data_regre$EXPECTCOLE      -2.733e-02  9.939e-03  -2.749  0.00691 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1851 on 118 degrees of freedom
## Multiple R-squared:  0.5894, Adjusted R-squared:  0.5755 
## F-statistic: 42.35 on 4 and 118 DF,  p-value: < 2.2e-16

efe= formula(Contagiados~ + data_regre$gobernanza)
afa = lm(efe, data = data_regre)
summary(afa)

## 
## Call:
## lm(formula = efe, data = data_regre)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29868 -0.11923 -0.05979  0.01058  2.22610 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.020026   0.050827  -0.394 0.694272    
## data_regre$gobernanza  0.035575   0.009484   3.751 0.000272 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2699 on 121 degrees of freedom
## Multiple R-squared:  0.1042, Adjusted R-squared:  0.09676 
## F-statistic: 14.07 on 1 and 121 DF,  p-value: 0.0002718

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

Anovita=anova(MINARISA, uwu, MINARISEM)
stargazer(Anovita,type = 'text',summary = F,title = "Table de Análisis de Varianza")

## 
## Table de Análisis de Varianza
## =====================================
##   Res.Df  RSS  Df Sum of Sq F Pr(> F)
## -------------------------------------
## 1  117   4.169                       
## 2  118   4.092 -1   0.077            
## 3  118   4.041 0    0.051            
## -------------------------------------

stargazer(uwu, MINARISA, afa, MINARISEM,  type='text')

## 
## ===================================================================================================================
##                                                           Dependent variable:                                      
##                     -----------------------------------------------------------------------------------------------
##                                                               Contagiados                                          
##                               (1)                     (2)                     (3)                     (4)          
## -------------------------------------------------------------------------------------------------------------------
## gobernanza                 -0.038***                                       0.036***                -0.035***       
##                             (0.012)                                         (0.009)                 (0.012)        
##                                                                                                                    
## Densidad                    0.0001                  0.0001                                                         
##                            (0.0001)                (0.0001)                                                        
##                                                                                                                    
## voice_acco                                          -0.036                                                         
##                                                     (0.024)                                                        
##                                                                                                                    
## RentaNacional             0.00002***              0.00001***                                      0.00001***       
##                            (0.00000)               (0.00000)                                       (0.00000)       
##                                                                                                                    
## PoblacionUrbana                                     0.002**                                         0.002*         
##                                                     (0.001)                                         (0.001)        
##                                                                                                                    
## EXPECTCOLE                 -0.020**                -0.036***                                       -0.027***       
##                             (0.009)                 (0.009)                                         (0.010)        
##                                                                                                                    
## Constant                   0.274***                 0.238**                 -0.020                 0.276***        
##                             (0.096)                 (0.111)                 (0.051)                 (0.095)        
##                                                                                                                    
## -------------------------------------------------------------------------------------------------------------------
## Observations                  123                     123                     123                     123          
## R2                           0.584                   0.576                   0.104                   0.589         
## Adjusted R2                  0.570                   0.558                   0.097                   0.576         
## Residual Std. Error    0.186 (df = 118)        0.189 (df = 117)        0.270 (df = 121)        0.185 (df = 118)    
## F Statistic         41.456*** (df = 4; 118) 31.845*** (df = 5; 117) 14.070*** (df = 1; 121) 42.349*** (df = 4; 118)
## ===================================================================================================================
## Note:                                                                                   *p<0.1; **p<0.05; ***p<0.01

el_elegido = MINARISEM

MInarisa

library(ggpubr) #gráfico para ver normalidad

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:plyr':
## 
##     mutate

library(scatterplot3d)
library(stargazer)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#LINEALIDAD
plot(el_elegido, 1, main = c("Gráfico 2: Linealidad"))  #diagonal, casi lineal

B. Homocedasticidad.

plot(el_elegido, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal

bptest(el_elegido) #valor P mayor a 0.05 Homocedasticidad

## 
##  studentized Breusch-Pagan test
## 
## data:  el_elegido
## BP = 55.086, df = 4, p-value = 3.117e-11

c. Normalidad de residuos. Puntos cerca de la diagonal.

plot(el_elegido, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal

shapiro.test(el_elegido$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos

## 
##  Shapiro-Wilk normality test
## 
## data:  el_elegido$residuals
## W = 0.82612, p-value = 9.628e-11

VIF(el_elegido)

##      data_regre$gobernanza data_regre$PoblacionUrbana 
##                   3.691249                   2.228003 
##   data_regre$RentaNacional      data_regre$EXPECTCOLE 
##                   3.100896                   3.052885

5.2 ver valores influyentes Prestar atención al indice de Cook.

plot(el_elegido, 5, main = c("Gráfico 5: Identificación de valores influyentes"))

checkMINARISA=as.data.frame(influence.measures(el_elegido)$is.inf)

checkMINARISA[checkMINARISA$cook.d | checkMINARISA$hat,] #120, 124

#data_regre

R Notebook