library(readxl)
library(rio)
library(stringi)
library(htmltab)
library(jsonlite)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(stringr)
library(tidyr)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
data_covid= "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
esperanzita=import(data_covid)
estoy segura que me arrepentire de esto, amen
esperanzita = esperanzita[,c(1,2,5:150)]
names(esperanzita) = c("PROVINCE","Country","A_1/22/20", "A_1/23/20", "A_1/24/20", "A_1/25/20", "A_1/26/20", "A_1/27/20", "A_1/28/20", "A_1/29/20", "A_1/30/20", "A_1/31/20", "A_2/1/20", "A_2/2/20", "A_2/3/20", "A_2/4/20", "A_2/5/20", "A_2/6/20" , "A2/7/20" , "A2/8/20", "A_2/9/20", "A_2/10/20", "A_2/11/20", "A_2/12/20", "A_2/13/20", "A_2/14/20", "A_2/15/20", "A_2/16/20", "A_2/17/20", "A_2/18/20", "A_2/19/20", "A_2/20/20", "A_2/21/20", "A_2/22/20", "A_2/23/20", "A_2/24/20", "A_2/25/20", "A_2/26/20", "A_2/27/20", "A_2/28/20", "A_2/29/20", "A_3/1/20", "A_3/2/20", "A_3/3/20", "A_3/4/20", "A_3/5/20", "A_3/6/20", "A_3/7/20", "A_3/8/20", "A_3/9/20", "A_3/10/20", "A_3/11/20", "A_3/12/20", "A_3/13/20", "A_3/14/20", "A_3/15/20", "A_3/16/20", "A_3/17/20", "A_3/18/20", "A_3/19/20","A_3/20/20", "A_3/21/20", "A_3/22/20", "A_3/23/20", "A_3/24/20", "A_3/25/20", "A_3/26/20", "A_3/27/20", "A_3/28/20", "A_3/29/20", "A_3/30/20", "A_3/31/20", "A_4/1/20", "A_4/2/20", "A_4/3/20", "A_4/4/20", "A_4/5/20", "A_4/6/20", "A_4/7/20", "A_4/8/20", "A_4/9/20", "A_4/10/20", "A_4/11/20", "A_4/12/20", "A_4/13/20", "A_4/14/20", "A_4/15/20", "A_4/16/20", "A_4/17/20", "A_4/18/20", "A_4/19/20", "A_4/20/20", "A_4/21/20", "A_4/22/20", "A_4/23/20", "A_4/24/20", "A_4/25/20", "A_4/26/20", "A_4/27/20", "A_4/28/20", "A_4/29/20", "A_4/30/20", "A_5/1/20", "A_5/2/20", "A_5/3/20", "A_5/4/20", "A_5/5/20", "A_5/6/20", "A_5/7/20", "A_5/8/20", "A_5/9/20", "A_5/10/20", "A_5/11/20", "A_5/12/20", "A_5/13/20", "A_5/14/20", "A_5/15/20", "A_5/16/20", "A_5/17/20", "A_5/18/20", "A_5/19/20", "A_5/20/20", "A_5/21/20", "A_5/22/20", "A_5/23/20", "A_5/24/20", "A_5/25/20", "A_5/26/20", "A_5/27/20", "A_5/28/20", "A_5/29/20", "A_5/30/20", "A_5/31/20", "A_6/1/20", "A_6/2/20", "A_6/3/20", "A_6/4/20", "A_6/5/20", "A_6/6/20", "A_6/7/20 ", "A_6/8/20", "A_6/9/20", "A_6/10/20", "A_6/11/20", "A_6/12/20", "A_6/13/20", "A_6/14/20", "A_6/15/20")
2020-06-11
str(esperanzita)
## 'data.frame': 266 obs. of 148 variables:
## $ PROVINCE : chr "" "" "" "" ...
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Andorra" ...
## $ A_1/22/20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ A_1/23/20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ A_1/24/20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ A_1/25/20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ A_1/26/20 : int 0 0 0 0 0 0 0 0 0 3 ...
## $ A_1/27/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_1/28/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_1/29/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_1/30/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_1/31/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/1/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/2/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/3/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/4/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/5/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/6/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A2/7/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A2/8/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/9/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/10/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/11/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/12/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/13/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/14/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/15/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/16/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/17/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/18/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/19/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/20/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/21/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/22/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/23/20 : int 0 0 0 0 0 0 0 0 0 4 ...
## $ A_2/24/20 : int 1 0 0 0 0 0 0 0 0 4 ...
## $ A_2/25/20 : int 1 0 1 0 0 0 0 0 0 4 ...
## $ A_2/26/20 : int 1 0 1 0 0 0 0 0 0 4 ...
## $ A_2/27/20 : int 1 0 1 0 0 0 0 0 0 4 ...
## $ A_2/28/20 : int 1 0 1 0 0 0 0 0 0 4 ...
## $ A_2/29/20 : int 1 0 1 0 0 0 0 0 0 4 ...
## $ A_3/1/20 : int 1 0 1 0 0 0 0 1 0 6 ...
## $ A_3/2/20 : int 1 0 3 1 0 0 0 1 0 6 ...
## $ A_3/3/20 : int 1 0 5 1 0 0 1 1 0 13 ...
## $ A_3/4/20 : int 1 0 12 1 0 0 1 1 0 22 ...
## $ A_3/5/20 : int 1 0 12 1 0 0 1 1 0 22 ...
## $ A_3/6/20 : int 1 0 17 1 0 0 2 1 0 26 ...
## $ A_3/7/20 : int 1 0 17 1 0 0 8 1 0 28 ...
## $ A_3/8/20 : int 4 0 19 1 0 0 12 1 0 38 ...
## $ A_3/9/20 : int 4 2 20 1 0 0 12 1 0 48 ...
## $ A_3/10/20 : int 5 10 20 1 0 0 17 1 0 55 ...
## $ A_3/11/20 : int 7 12 20 1 0 0 19 1 0 65 ...
## $ A_3/12/20 : int 7 23 24 1 0 0 19 4 0 65 ...
## $ A_3/13/20 : int 7 33 26 1 0 1 31 8 1 92 ...
## $ A_3/14/20 : int 11 38 37 1 0 1 34 18 1 112 ...
## $ A_3/15/20 : int 16 42 48 1 0 1 45 26 1 134 ...
## $ A_3/16/20 : int 21 51 54 2 0 1 56 52 2 171 ...
## $ A_3/17/20 : int 22 55 60 39 0 1 68 78 2 210 ...
## $ A_3/18/20 : int 22 59 74 39 0 1 79 84 3 267 ...
## $ A_3/19/20 : int 22 64 87 53 0 1 97 115 4 307 ...
## $ A_3/20/20 : int 24 70 90 75 1 1 128 136 6 353 ...
## $ A_3/21/20 : int 24 76 139 88 2 1 158 160 9 436 ...
## $ A_3/22/20 : int 40 89 201 113 2 1 266 194 19 669 ...
## $ A_3/23/20 : int 40 104 230 133 3 3 301 235 32 669 ...
## $ A_3/24/20 : int 74 123 264 164 3 3 387 249 39 818 ...
## $ A_3/25/20 : int 84 146 302 188 3 3 387 265 39 1029 ...
## $ A_3/26/20 : int 94 174 367 224 4 7 502 290 53 1219 ...
## $ A_3/27/20 : int 110 186 409 267 4 7 589 329 62 1405 ...
## $ A_3/28/20 : int 110 197 454 308 5 7 690 407 71 1617 ...
## $ A_3/29/20 : int 120 212 511 334 7 7 745 424 77 1791 ...
## $ A_3/30/20 : int 170 223 584 370 7 7 820 482 78 2032 ...
## $ A_3/31/20 : int 174 243 716 376 7 7 1054 532 80 2032 ...
## $ A_4/1/20 : int 237 259 847 390 8 7 1054 571 84 2182 ...
## $ A_4/2/20 : int 273 277 986 428 8 9 1133 663 87 2298 ...
## $ A_4/3/20 : int 281 304 1171 439 8 15 1265 736 91 2389 ...
## $ A_4/4/20 : int 299 333 1251 466 10 15 1451 770 93 2493 ...
## $ A_4/5/20 : int 349 361 1320 501 14 15 1451 822 96 2580 ...
## $ A_4/6/20 : int 367 377 1423 525 16 15 1554 833 96 2637 ...
## $ A_4/7/20 : int 423 383 1468 545 17 19 1628 853 96 2686 ...
## $ A_4/8/20 : int 444 400 1572 564 19 19 1715 881 99 2734 ...
## $ A_4/9/20 : int 484 409 1666 583 19 19 1795 921 100 2773 ...
## $ A_4/10/20 : int 521 416 1761 601 19 19 1975 937 103 2822 ...
## $ A_4/11/20 : int 555 433 1825 601 19 21 1975 967 103 2857 ...
## $ A_4/12/20 : int 607 446 1914 638 19 21 2142 1013 103 2857 ...
## $ A_4/13/20 : int 665 467 1983 646 19 23 2208 1039 102 2863 ...
## $ A_4/14/20 : int 714 475 2070 659 19 23 2277 1067 103 2870 ...
## $ A_4/15/20 : int 784 494 2160 673 19 23 2443 1111 103 2886 ...
## $ A_4/16/20 : int 840 518 2268 673 19 23 2571 1159 103 2897 ...
## $ A_4/17/20 : int 906 539 2418 696 19 23 2669 1201 103 2926 ...
## $ A_4/18/20 : int 933 548 2534 704 24 23 2758 1248 103 2936 ...
## $ A_4/19/20 : int 996 562 2629 713 24 23 2839 1291 103 2957 ...
## $ A_4/20/20 : int 1026 584 2718 717 24 23 2941 1339 104 2963 ...
## $ A_4/21/20 : int 1092 609 2811 717 24 23 3031 1401 104 2969 ...
## $ A_4/22/20 : int 1176 634 2910 723 25 24 3144 1473 104 2971 ...
## $ A_4/23/20 : int 1279 663 3007 723 25 24 3435 1523 104 2976 ...
## $ A_4/24/20 : int 1351 678 3127 731 25 24 3607 1596 105 2982 ...
## $ A_4/25/20 : int 1463 712 3256 738 25 24 3780 1677 106 2994 ...
## $ A_4/26/20 : int 1531 726 3382 738 26 24 3892 1746 106 3002 ...
## $ A_4/27/20 : int 1703 736 3517 743 27 24 4003 1808 106 3004 ...
## [list output truncated]
esperanzita[] <- lapply(esperanzita, as.character)
esperanzita <- data.frame(lapply(esperanzita, function(x) ifelse(!is.na(as.numeric(x)), as.numeric(x), x)))
## Warning in ifelse(!is.na(as.numeric(x)), as.numeric(x), x): NAs introduced by
## coercion
## Warning in ifelse(!is.na(as.numeric(x)), as.numeric(x), x): NAs introduced by
## coercion
esperanzita[esperanzita == 0] <- NA
library(dplyr)
library(tidyr)
esperanzita = esperanzita %>%
gather(A_, val, 3:148) %>%
filter(!is.na(val)) %>%
group_by(Country) %>%
summarise(uno= first(val),
dos = nth(val, 2),
tres = nth(val, 3),
cuatro = nth(val, 4),
cinco = nth(val, 5),
seis = nth(val, 6),
siete = nth(val, 7),
ocho = nth(val, 8),
nueve = nth(val, 9),
diez = nth(val, 10),
once = nth(val, 11),
doce = nth(val, 12),
trece = nth(val, 13),
catorce = nth(val, 14),
quince = nth(val, 15),
dieciseis = nth(val, 16),
diecisite = nth(val, 17),
dieciocho = nth(val, 18),
diecinueve = nth(val, 19),
veinte = nth(val, 20),
veintiuno = nth(val, 21),
veintidos = nth(val, 22),
veintitres = nth(val, 23),
veinticuatro = nth(val, 24),
veinticinco = nth(val, 25),
veintiseis = nth(val, 26),
veintisiete = nth(val, 27),
veintocho = nth(val, 28),
veintinueve = nth(val, 29),
treinta = nth(val, 30))%>%
left_join(esperanzita, ., by = "Country")
## `summarise()` ungrouping output (override with `.groups` argument)
esperanzita = esperanzita[,-c(3:148)]
esperanzita=aggregate(cbind(uno,dos,tres,cuatro,cinco,seis,siete,ocho,nueve,diez,once,doce,trece,catorce,quince,dieciseis,diecisite,dieciocho,diecinueve, veinte, veintiuno,veintidos,veintitres,veinticuatro,veinticinco,veintiseis,veintisiete,veintocho,veintinueve,treinta) ~ Country,data = esperanzita,sum)
#para añadir el nombre
hoja="https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/codigo.csv"
oto=import(hoja)
esperanzita=merge(oto,esperanzita, by.x = 'Country', by.y='Country') #¿se podra recuperar más data de países?
names(esperanzita)
## [1] "Country" "CODE" "uno" "dos" "tres"
## [6] "cuatro" "cinco" "seis" "siete" "ocho"
## [11] "nueve" "diez" "once" "doce" "trece"
## [16] "catorce" "quince" "dieciseis" "diecisite" "dieciocho"
## [21] "diecinueve" "veinte" "veintiuno" "veintidos" "veintitres"
## [26] "veinticuatro" "veinticinco" "veintiseis" "veintisiete" "veintocho"
## [31] "veintinueve" "treinta"
esperanzita$Country = NULL
linkedin = "https://github.com/AriannaNKZC/Estad-2/raw/master/%C2%BFSera%20la%20data%3F.xls"
poblacion = import(linkedin)
poblacion = poblacion[,c(1,2,64)]
str(poblacion)
## 'data.frame': 264 obs. of 3 variables:
## $ Country Name: chr "Aruba" "Afganistán" "Angola" "Albania" ...
## $ Country Code: chr "ABW" "AFG" "AGO" "ALB" ...
## $ 2019 : num 106314 38041754 31825295 2854191 77142 ...
names(poblacion)= c("Country", "CODE", "pobla")
esperanzita=merge(poblacion,esperanzita, by.x = 'CODE', by.y='CODE')
#pobla esta en miles
esperanzita$pobla = (esperanzita$pobla)
#comenzo el martirio
esperanzita$uno = (esperanzita$uno/esperanzita$pobla)*100
esperanzita$dos = (esperanzita$dos/esperanzita$pobla)*100
esperanzita$tres = (esperanzita$tres/esperanzita$pobla)*100
esperanzita$cuatro = (esperanzita$cuatro/esperanzita$pobla)*100
esperanzita$cinco = (esperanzita$cinco/esperanzita$pobla)*100
esperanzita$seis = (esperanzita$seis/esperanzita$pobla)*100
esperanzita$siete = (esperanzita$siete/esperanzita$pobla)*100
esperanzita$ocho = (esperanzita$ocho/esperanzita$pobla)*100
esperanzita$nueve = (esperanzita$nueve/esperanzita$pobla)*100
esperanzita$diez = (esperanzita$diez/esperanzita$pobla)*100
esperanzita$once = (esperanzita$once/esperanzita$pobla)*100
esperanzita$doce = (esperanzita$doce/esperanzita$pobla)*100
esperanzita$trece = (esperanzita$trece/esperanzita$pobla)*100
esperanzita$catorce= (esperanzita$catorce/esperanzita$pobla)*100
esperanzita$quince = (esperanzita$quince/esperanzita$pobla)*100
esperanzita$dieciseis = (esperanzita$dieciseis/esperanzita$pobla)*100
esperanzita$diecisite = (esperanzita$diecisite/esperanzita$pobla)*100
esperanzita$dieciocho = (esperanzita$dieciocho/esperanzita$pobla)*100
esperanzita$diecinueve = (esperanzita$diecinueve/esperanzita$pobla)*100
esperanzita$veinte = (esperanzita$veinte/esperanzita$pobla)*100
esperanzita$veintiuno = (esperanzita$veintiuno/esperanzita$pobla)*100
esperanzita$veintidos = (esperanzita$veintidos/esperanzita$pobla)*100
esperanzita$veintitres = (esperanzita$veintitres/esperanzita$pobla)*100
esperanzita$veinticuatro = (esperanzita$veinticuatro/esperanzita$pobla)*100
esperanzita$veinticinco = (esperanzita$veinticinco/esperanzita$pobla)*100
esperanzita$veintiseis = (esperanzita$veintiseis/esperanzita$pobla)*100
esperanzita$veintisiete = (esperanzita$veintisiete/esperanzita$pobla)*100
esperanzita$veintocho = (esperanzita$veintocho/esperanzita$pobla)*100
esperanzita$veintinueve = (esperanzita$veintinueve/esperanzita$pobla)*100
esperanzita$treinta = (esperanzita$treinta/esperanzita$pobla)*100
#la suma y division
esperanzita$promedio = rowSums(esperanzita[ , 4:33])
esperanzita$promedio = (esperanzita$promedio/30)
esperanzita = esperanzita[,c(1,2,3,34)]
library(rio)
library(stringi)
library(htmltab)
library(jsonlite)
library(lubridate)
library(readr)
library(stringr)
library(tidyr)
library(data.table)
library(DescTools)
library(readxl)
data_salud <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
gasto_salud=import(data_salud)
names(gasto_salud)=(gasto_salud[1,])
gasto_salud = gasto_salud[-1,]
gasto_salud = gasto_salud[,c(1,2, 62)]
str(gasto_salud$`2017`)
## num [1:264] NA 11.78 2.79 NA 10.32 ...
names(gasto_salud) = c("PAIS", "CODE","GS_2017")
summary(gasto_salud)
## PAIS CODE GS_2017
## Length:264 Length:264 Min. : 1.181
## Class :character Class :character 1st Qu.: 4.534
## Mode :character Mode :character Median : 6.342
## Mean : 6.633
## 3rd Qu.: 8.196
## Max. :17.143
## NA's :79
#quitando las tildes
gasto_salud$PAIS =stri_trans_general(gasto_salud$PAIS,"Latin-ASCII")
library(readxl)
data_ppp <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_NY.GDP.PCAP.CD_DS2_es_csv_v2_1347337.csv"
ppp_pib =import(data_ppp)
names(ppp_pib)=(ppp_pib[1,])
ppp_pib = ppp_pib[-1,]
ppp_pib = ppp_pib[,c(2,63)]
names(ppp_pib) = c("CODE", "PPP_2018")
linkfechas="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/BASE_FECHA_INICIO.xlsx"
datafechas=import(linkfechas)
datafechas = datafechas[,c(1,2)]
names(datafechas) = c("COUNTRY","CODE")
Tabla_Final = merge(gasto_salud,ppp_pib,by.x='CODE', by.y='CODE')
link1="https://github.com/CarlosGDiez/BasesLimpias/raw/master/Gee_sucio.csv"
data1=import(link1) #esto puede ser demasiado pesado para correrlo como Chunk... como linea individual no tiene problema.
dim(data1)
## [1] 433 5
link2="https://github.com/CarlosGDiez/BasesLimpias/blob/master/Rigurosidad.csv?raw=true" #esto puede ser demasiado pesado para correrlo como Chunk... como linea individual no tiene problema.
data2=import(link2)
dim(data2)
## [1] 64829 44
#GEE
library(dplyr)
#Renombrar variables
names(data1)[1]="Country"
names(data1)[2]="CODE"
names(data1)[3]="Series"
#Filtrar para tomar valor GEE y no el error estandar
Prueba1=data1%>%
group_by(Country)%>%
mutate(Index = ifelse(Series==nth(Series,1), 1, 0))%>%
filter(Index==1)
#eliminamos filas vacías
Prueba1=Prueba1[-c(215,216,217,218,219),]
names(Prueba1)[5]="Indice"
Prueba1$Indice=parse_number(Prueba1$Indice)
## Warning: 5 parsing failures.
## row col expected actual
## 46 -- a number ..
## 129 -- a number ..
## 139 -- a number ..
## 144 -- a number ..
## 164 -- a number ..
#eliminamos filas sin valores
Prueba1=Prueba1[-c(46,129,139,144,164),]
str(Prueba1$Indice)
## num [1:209] -1.457 0.115 -0.444 0.551 1.945 ...
## - attr(*, "problems")= tibble [5 × 4] (S3: tbl_df/tbl/data.frame)
## ..$ row : int [1:5] 46 129 139 144 164
## ..$ col : int [1:5] NA NA NA NA NA
## ..$ expected: chr [1:5] "a number" "a number" "a number" "a number" ...
## ..$ actual : chr [1:5] ".." ".." ".." ".." ...
Prueba1$Indice=as.numeric(Prueba1$Indice)
str(Prueba1$Indice)
## num [1:209] -1.457 0.115 -0.444 0.551 1.945 ...
Limpieza adicional. Es útil reducir todo solo a código de país e indice,
Prueba1$Country=NULL
Prueba1$Series=NULL
Prueba1$Index=NULL
Prueba1$`Series Code`=NULL
Prueba1$std=NULL
#no parece haber diferencias notables
Limpieza
data3=data2 #copiamos data para tenerla a salvo de cambios
#cambiamos nombres
names(data3)[2]="CODE"
data3[6:34]=NULL
data3[7:15]=NULL
#dejamos country porque la necesitaremos más adelante
data3$RegionCode=NULL
data3$RegionName=NULL #estamos tomando medidas a nivel de pais, no local
data3$Date <- ymd(data3$Date)
Seleccionar para que tome solo rigurosidad en el séptimo día.
#la primera parte de esto provienede los datos de Jose Incio.
confirmed <- "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
WorldData<-import(file = confirmed)%>%
mutate(type="datacon")%>%
tidyr::gather(Fecha,Valor,-c(type,"Province/State",
"Country/Region",Lat,Long)) #juntando fechas distintas en una sola.
Prueba2= WorldData%>%
filter(Valor>0)
names(Prueba2)[2]="Country"
str(Prueba2$Country)
## chr [1:59475] "China" "China" "China" "China" "China" "China" "China" ...
Prueba2$Country=as.factor(Prueba2$Country)
str(Prueba2$Fecha)
## chr [1:59475] "1/22/20" "1/22/20" "1/22/20" "1/22/20" "1/22/20" "1/22/20" ...
Prueba2$Fecha=mdy(Prueba2$Fecha)
Prueba2$Fecha=as.Date(Prueba2$Fecha)
#juntando provincias en paises
Prueba2=aggregate(Valor
~ Country + Fecha,
data = Prueba2,
sum)
#hay un problema tiene paises requerimos códigos, un simple merge de paises y codigos para alinearlo
data4=data3
data4$Date=NULL
data4$GovernmentResponseIndex=NULL
data4$RegionName=NULL
data4$StringencyIndex=NULL
names(data4)[1]="Country"
#eliminamos paises repetidos. #advertencia algunas veces esta parte es lenta en leer, o lo era nates,a hora corre rapido
data4=data4[!duplicated(data4$Country),]
#Perfecto ahora tenemos una base que solo tien paises (key) y codigos
#aplciaremos Merge más adelante
##calculos de dias
#calcular dia 30
Prueba100=Prueba2%>%
group_by(Country)%>%
mutate(dia100= ifelse(Fecha==nth(Fecha,100),1,0))%>%
filter(dia100==1)
Prueba100=merge(Prueba100,data4, by.x="Country", by.y="Country")
Prueba100=Prueba100[,-4]
names(Prueba100)[4]="Code"
#Habiendo hecho eso procedemos a repetirlo pero con el dia 7 que es ek qu eutikizarmeos para rigruosidad
Prueba7=Prueba2%>%
group_by(Country)%>%
mutate(dia7 = ifelse(Fecha==nth(Fecha,7), 1, 0))%>%
filter(dia7==1)
table(Prueba7$Fecha)
##
## 2020-01-28 2020-01-29 2020-01-30 2020-01-31 2020-02-01 2020-02-02 2020-02-04
## 6 2 1 2 2 3 2
## 2020-02-05 2020-02-06 2020-02-07 2020-02-10 2020-02-13 2020-02-20 2020-02-25
## 2 3 2 1 1 1 1
## 2020-02-27 2020-02-29 2020-03-01 2020-03-02 2020-03-03 2020-03-04 2020-03-05
## 2 1 5 5 6 4 6
## 2020-03-06 2020-03-07 2020-03-08 2020-03-09 2020-03-10 2020-03-11 2020-03-12
## 4 5 7 3 4 4 9
## 2020-03-13 2020-03-14 2020-03-15 2020-03-16 2020-03-17 2020-03-18 2020-03-19
## 1 5 3 3 6 2 7
## 2020-03-20 2020-03-21 2020-03-22 2020-03-23 2020-03-24 2020-03-25 2020-03-26
## 14 4 5 3 4 4 7
## 2020-03-27 2020-03-28 2020-03-29 2020-03-30 2020-03-31 2020-04-02 2020-04-03
## 2 5 1 2 3 1 1
## 2020-04-05 2020-04-06 2020-04-08 2020-04-11 2020-04-12 2020-04-16 2020-05-06
## 1 2 1 2 1 1 2
## 2020-05-19
## 1
dia7=merge(Prueba7,data4, by.x="Country", by.y="Country")
#ahora podemos eliminar country en la data original
data3[1]=NULL
Ahora creamos varaibles mergeables
dia7$DIA7=paste(dia7$CODE,dia7$Fecha)
dia7$Country=NULL
dia7$dia7=NULL
dia7$CODE=NULL
table(dia7$DIA7)
##
## AFG 2020-03-01 AGO 2020-03-26 ALB 2020-03-15 AND 2020-03-08 ARE 2020-02-04
## 1 1 1 1 1
## ARG 2020-03-09 AUS 2020-02-01 AUT 2020-03-02 AZE 2020-03-07 BDI 2020-04-06
## 1 1 1 1 1
## BEL 2020-02-10 BEN 2020-03-22 BFA 2020-03-16 BGD 2020-03-14 BGR 2020-03-14
## 1 1 1 1 1
## BHR 2020-03-01 BIH 2020-03-11 BLR 2020-03-05 BLZ 2020-03-29 BOL 2020-03-17
## 1 1 1 1 1
## BRA 2020-03-03 BRB 2020-03-23 BRN 2020-03-15 BTN 2020-03-12 BWA 2020-04-05
## 1 1 1 1 1
## CAF 2020-03-21 CAN 2020-02-01 CHE 2020-03-02 CHL 2020-02-29 CHN 2020-01-28
## 1 1 1 1 1
## CIV 2020-03-17 CMR 2020-03-12 COL 2020-03-12 CRI 2020-03-12 CUB 2020-03-18
## 1 1 1 1 1
## CYP 2020-03-15 DEU 2020-02-02 DJI 2020-03-24 DMA 2020-03-28 DNK 2020-03-04
## 1 1 1 1 1
## DOM 2020-03-07 DZA 2020-03-02 ECU 2020-03-07 EGY 2020-02-20 ERI 2020-03-27
## 1 1 1 1 1
## ESP 2020-02-07 EST 2020-03-04 ETH 2020-03-19 FIN 2020-02-04 FJI 2020-03-25
## 1 1 1 1 1
## FRA 2020-01-30 GAB 2020-03-20 GBR 2020-02-06 GEO 2020-03-03 GHA 2020-03-20
## 1 1 1 1 1
## GIN 2020-03-19 GMB 2020-03-23 GRC 2020-03-03 GTM 2020-03-20 GUY 2020-03-18
## 1 1 1 1 1
## HND 2020-03-17 HRV 2020-03-02 HTI 2020-03-26 HUN 2020-03-10 IDN 2020-03-08
## 1 1 1 1 1
## IND 2020-02-05 IRL 2020-03-06 IRN 2020-02-25 IRQ 2020-03-01 ISL 2020-03-05
## 1 1 1 1 1
## ISR 2020-02-27 ITA 2020-02-06 JAM 2020-03-17 JOR 2020-03-09 JPN 2020-01-28
## 1 1 1 1 1
## KAZ 2020-03-19 KEN 2020-03-19 KHM 2020-02-02 KWT 2020-03-01 LAO 2020-03-30
## 1 1 1 1 1
## LBN 2020-02-27 LBR 2020-03-22 LBY 2020-03-30 LKA 2020-02-02 LSO 2020-05-19
## 1 1 1 1 1
## LTU 2020-03-05 LUX 2020-03-06 LVA 2020-03-08 MAR 2020-03-08 MDA 2020-03-14
## 1 1 1 1 1
## MDG 2020-03-26 MEX 2020-03-05 MLI 2020-03-31 MNG 2020-03-16 MOZ 2020-03-28
## 1 1 1 1 1
## MRT 2020-03-20 MUS 2020-03-24 MWI 2020-04-08 MYS 2020-01-31 NAM 2020-03-20
## 1 1 1 1 1
## NER 2020-03-26 NGA 2020-03-05 NIC 2020-03-25 NLD 2020-03-04 NOR 2020-03-03
## 1 1 1 1 1
## NPL 2020-01-31 NZL 2020-03-05 OMN 2020-03-01 PAK 2020-03-02 PAN 2020-03-16
## 1 1 1 1 1
## PER 2020-03-12 PHL 2020-02-05 PNG 2020-03-26 POL 2020-03-10 PRT 2020-03-08
## 1 1 1 1 1
## PRY 2020-03-14 QAT 2020-03-06 RKS 2020-03-20 ROU 2020-03-03 RUS 2020-02-06
## 1 1 1 1 1
## RWA 2020-03-20 SAU 2020-03-08 SDN 2020-03-19 SEN 2020-03-08 SGP 2020-01-29
## 1 1 1 1 1
## SLE 2020-04-06 SLV 2020-03-25 SMR 2020-03-04 SOM 2020-03-22 SRB 2020-03-12
## 1 1 1 1 1
## SSD 2020-04-11 SUR 2020-03-20 SVN 2020-03-11 SWE 2020-02-07 SWZ 2020-03-20
## 1 1 1 1 1
## SYC 2020-03-20 SYR 2020-03-28 TCD 2020-03-25 TGO 2020-03-12 THA 2020-01-28
## 1 1 1 1 1
## TJK 2020-05-06 TLS 2020-03-28 TTO 2020-03-20 TUN 2020-03-10 TUR 2020-03-17
## 1 1 1 1 1
## TZA 2020-03-22 UGA 2020-03-27 UKR 2020-03-09 URY 2020-03-19 UZB 2020-03-21
## 1 1 1 1 1
## VEN 2020-03-20 VNM 2020-01-29 YEM 2020-04-16 ZAF 2020-03-11 ZMB 2020-03-24
## 1 1 1 1 1
## ZWE 2020-03-26
## 1
data3$DIA7=paste(data3$CODE,data3$Date)
data3$Date=NULL
head(data3)
ResGob=merge(data3,dia7, by.x="DIA7", by.y = "DIA7")
#eliminamos valores ya no necesario como el mismo DIA 7 y es tan frustrante eliminarlo despues de tanto esfuerzo.
ResGob$DIA7=NULL
#El valor ya no es necesario es parte de la variable dependiente no de esta independeinte
ResGob$Valor=NULL
str(ResGob$StringencyIndex)
## num [1:160] 27.78 33.33 81.48 0 2.78 ...
dataFINAL=merge(ResGob,Prueba1, by.x="CODE",by.y = "CODE")
#infolaw
infocamp = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/public-campaigns-covid.csv"
dataic=import(infocamp)
str(dataic$Date)
## IDate[1:46886], format: "2020-01-01" "2020-01-02" "2020-01-03" "2020-01-04" "2020-01-05" ...
names(dataic)[1]= "Country"
names(dataic)[3]= "Fecha"
dataic$DIA7=paste(dataic$Code, dataic$Fecha)
c7=merge(dataic, dia7, by.x="DIA7", by.y="DIA7")
c7=c7[,-c(1,4,7)]
names(c7)=c("Country", "Code", "infoalawk", "Fecha")
#Población Urbana: Evidencia el porcentaje de la población urbana de un país
xurb = "https://raw.githubusercontent.com/CarlaMendozaE/Prueba/master/API_SP.URB.TOTL.IN.ZS_DS2_es_csv_v2_1347951.csv"
dataxurb=import(xurb)
names(dataxurb)=(dataxurb[1,])
dataxurb[,3:62]= NULL
dataxurb[,4:5]= NULL
names(dataxurb)[3]= "%poburb18"
dataxurb$'%poburb18'=round(dataxurb$'%poburb18', digits = 2)
dataxurb=dataxurb[c(-1,-61,-62,-63,-64,-65,-68,-73,-74,-95,-98,-102,-103,-104,-105,-107, -110,-128,-134,-135,-136,-139,-140,-142,-153,-156,-161,-170,-181,-191,-197,-198,-204,-215,-217,-218,-230,-231,-236,-238,-240,-241,-249),]
dataxurb$num=c(1:222)
rownames(dataxurb)=dataxurb[,4]
dataxurb[,4]= NULL
names(dataxurb)[2]= "Code"
names(dataxurb)[1]= "Country"
Capacidad Estatal
#Índice de Desarrollo Humano (Human Development Index): Indicador que integra las variables PBI, Educación y Esperanza de vida
LIDH="https://github.com/CarlaMendozaE/Prueba/blob/master/IDH.xlsx?raw=true"
IDH=import(LIDH)
IDH[,c(1,8,9)]=NULL
names(IDH)[2]= "HDI"
names(IDH)[3]= "EXPECTATIVAVIDA"
names(IDH)[4]= "EXPECTCOLE"
names(IDH)[5]= "YEARS_SCHOOLING"
names(IDH)[6]= "GNI_GROSSNATIONALINCOME"
IDH[,-1]=lapply(IDH[,-1], as.numeric)
## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH[, -1], as.numeric): NAs introduced by coercion
str(IDH)
## 'data.frame': 222 obs. of 6 variables:
## $ Country : chr "Norway" "Switzerland" "Ireland" "Germany" ...
## $ HDI : num 0.954 0.946 0.942 0.939 0.939 ...
## $ EXPECTATIVAVIDA : num 82.3 83.6 82.1 81.2 84.7 ...
## $ EXPECTCOLE : num 18.1 16.2 18.8 17.1 16.5 ...
## $ YEARS_SCHOOLING : num 12.6 13.4 12.5 14.1 12 ...
## $ GNI_GROSSNATIONALINCOME: num 68059 59375 55660 46946 60221 ...
IDH$HDI= as.numeric(IDH$HDI)
IDH$HDI=round(IDH$HDI, digits = 4)
#Eliminamos filas zzz
IDH=IDH[c(-63,-118,-156,-193:-222),]
#Mergeamos solo con los países que nos interesan
IDH=merge(IDH,c7,by.x='Country', by.y='Country')
IDH=merge(IDH,Prueba100,by.x='Country', by.y='Country')
#Limpiamos
IDH=IDH[,-c(11:12)]
names(IDH)[7]="Code"
names(IDH)[9]="d7"
names(IDH)[10]="d100"
Sin embargo, nos interesa tenerla toda junta en un solo data frame. Así que mergeamos.
Carla=merge(IDH, dataxurb, by.x = "Code", by.y = "Code")
#Carla=merge(Carla, dataxrural, by.x = "Code", by.y = "Code")
#LIMPIA
str(Carla)
## 'data.frame': 140 obs. of 12 variables:
## $ Code : chr "AFG" "AGO" "ALB" "AND" ...
## $ Country.x : chr "Afghanistan" "Angola" "Albania" "Andorra" ...
## $ HDI : num 0.496 0.575 0.791 0.857 0.866 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 78.5 81.8 77.8 ...
## $ EXPECTCOLE : num 10.1 11.8 15.2 13.3 13.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.05 10.16 10.95 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 12300 48641 66912 ...
## $ infoalawk : int 2 1 2 0 0 2 2 2 2 1 ...
## $ d7 : Date, format: "2020-03-01" "2020-03-26" ...
## $ d100 : Date, format: "2020-06-02" "2020-06-27" ...
## $ Country.y : chr "Afganistán" "Angola" "Albania" "Andorra" ...
## $ %poburb18 : num 25.5 65.5 60.3 88.1 86.5 ...
Carla=Carla[,-11]
names(Carla)[2]="Country"
####AYUDA ECONOMICA LIMPIEZA INICIAL
#EXTRAEMOS LA DATA
library(rio)
linkayuda="https://raw.githubusercontent.com/CarlosGDiez/BasesLimpias/master/Rigurosidad.csv"
dataayuda=import(linkayuda)
#ELIMINACION DE COLUMNAS NO NECESARIAS
dataayuda = dataayuda[,c(1:5, 21)]
#ELIMINAMOS LAS REGIONES (SOLO NOS INTERESAN LOS PAISES)
#USA
dataayuda <- dataayuda[-c(48601 :62640), ]
#UK
dataayuda <- dataayuda[-c(16741 :17820), ]
#ELIMINAMOS LAS COLUMNAS DE REGION
dataayuda <- dataayuda[,-c(3, 4) ]
#SIMPLIFICAMOS LOS NOMBRES
names(dataayuda) = c("pais", "code", "fecha", "apoyo")
#TRANSFORMAMOS LA COLUMNA 3 EN FECHAS
dataayuda[ , 3 ] <- ymd(dataayuda[, 3])
BASE DIA 1
#CASOS CONFIRMADOS DE CONTAGIOS
confirmed <- "https://github.com/CarlosGDiez/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
dataconfirmados<-import(file = confirmed)%>%
mutate(type="datacon")%>%
tidyr::gather(Fecha,Valor,-c(type,"Province/State",
"Country/Region",Lat,Long))
dataconfirmados[ , 6] <- mdy(dataconfirmados[, 6])
prueba4= dataconfirmados%>%
filter(Valor>0)
#JUNTAMOS LAS PROVINCIAS A UN SOLO PAIS
names(prueba4)[2]="pais"
names(prueba4)[6]="fecha"
prueba4=aggregate(Valor # dependientes
~ pais + fecha, # nivel
data = prueba4, # data
sum) # operacion
#ALINEAMOS CODIGOS Y PAISES (DATA SOLO DE PAISES Y CODIGOS)
datacode=dataayuda
datacode$fecha=NULL
datacode$apoyo=NULL
#ELIMINAMOS PAISES REPETIDOS
datacode=datacode[!duplicated(datacode$pais),]
#DIA 7
dia7=prueba4%>%
group_by(pais)%>%
mutate(dia7 = ifelse(fecha==nth(fecha,7), 1, 0))%>%
filter(dia7==1)
str(dia7$fecha)
## Date[1:188], format: "2020-01-28" "2020-01-28" "2020-01-28" "2020-01-28" "2020-01-28" ...
dia7$fecha=as.Date(dia7$fecha)
#DIA 7 Y MERGE CON CODIGOS
dia7_final=merge(dia7,datacode, by.x="pais", by.y="pais")
JUNTAMOS LOS DIAS CON LOS DATOS PARA CADA DIA
#CREACION DEL ID Y FECHA DEL DIA 7
dia7_final$DIA7=paste(dia7_final$code,dia7_final$fecha)
dia7_final$pais=NULL
dia7_final$dia7=NULL
dia7_final$code=NULL
dataayuda$DIA7=paste(dataayuda$code,dataayuda$fecha)
dataayuda$fecha=NULL
head(dataayuda)
#AGREGAMOS LOS DATOS DE APOYO ECONOMICO
APOYOECO=merge(dia7_final,dataayuda, by.x="DIA7", by.y = "DIA7")
APOYOECO$DIA7=NULL
APOYOECO$Valor=NULL
####DENSIDAD DE LA POBLACION EXTRACCION Y LIMPIEZA INICIAL
#EXTRAEMOS LA DATA
linkdensidad="https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/Excel%20densidad.xlsx.xls"
datadensidad=import(linkdensidad)
## New names:
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
#ELIMINAMOS FILAS INNCESESARIAS INICIALES
datadensidad <- datadensidad[-c(1, 2), ]
#LA PRIMERA FILA SE VUELVE HEADLINE
names(datadensidad) <- as.matrix(datadensidad[1, ])
datadensidad <- datadensidad[-1, ]
datadensidad[] <- lapply(datadensidad, function(x) type.convert(as.character(x)))
#ELIMINAMOS LAS COLUMNAS INNECESARIAS
datadensidad = datadensidad[,c(1, 2, 63)]
#SIMPLIFICAMOS LOS NOMBRES DE LAS COLUMNAS
names(datadensidad) = c("pais", "code", "2018")
LIMPIEZA MÁS PROFUNDA
#ORDENAMOS LA DATA ALFABETICAMENTE
prueba3 <- datadensidad[order(datadensidad$pais),]
rownames(prueba3)<-c(1:264)
#ELIMINAMOS LAS FILAS INNECESARIAS
prueba3 <- prueba3[-c(8, 9, 38, 40, 41, 61:64, 73:77, 81, 99, 100, 104, 106:109, 130:133, 135, 142:144, 158:161, 178, 182, 183, 185, 186, 196, 197, 215, 219, 220, 228:230, 253, 261), ]
#NOMBRE FINAL
datadensidadfinal<-prueba3
####TASA DE DESEMPLEO
#EXTRAEMOS LA DATA
datadesempleo <- "https://github.com/MariaJoseVega/Trabajo-grupal-2020.2/raw/master/datadesempleooriginal.csv"
datadesempleo=import(datadesempleo)
#SIMPLIFICAMOS LOS NOMBRES
names(datadesempleo) = c("pais", "tasa")
#ORDENAMOS LA DATA ALFABETICAMENTE
datadesempleo <- datadesempleo[order(datadesempleo$pais),]
rownames(datadesempleo)<-c(1:187)
DATA PAISES (PARA INCLUIR LOS CODIGOS DE LOS PAISES)
data_salud <- "https://raw.githubusercontent.com/AriannaNKZC/TrabajoGrupal/bases-de-datos/API_SH.XPD.CHEX.GD.ZS_DS2_es_csv_v2_1347692.csv"
gasto_salud=import(data_salud)
gasto_salud = gasto_salud[,c(1, 2)]
gasto_salud = gasto_salud[-c(1),]
names(gasto_salud) = c("pais", "code")
#MERGE (PARA AGREGAR LOS CODIGOS)
prueba1=merge(datadesempleo,gasto_salud,all.x=T,all.y=T)
PRUEBA 1, LIMPIEZA DEL MERGE
#ELIMINAMOS LAS FILAS INNCESARIAS
prueba1 = prueba1[-c(1, 3:5, 8:11, 21:25, 28, 43, 86, 93, 94, 99:102, 108, 131: 134, 190, 192, 191, 198, 206, 212:215, 217:220, 228, 233, 234, 237, 246, 251, 252, 266, 267, 287, 288, 295, 308),]
#CAMBIAMOS NOMBRES
prueba1$pais = gsub("Arabia Saudita", "Arabia Saudí", prueba1$pais)
prueba1$pais = gsub("Bahráin", "Bahrein", prueba1$pais)
prueba1$pais = gsub("Belarús", "Bielorrusia", prueba1$pais)
prueba1$pais = gsub("Benín", "Benin", prueba1$pais)
prueba1$pais = gsub("Birmania; Myanmar", "Birri", prueba1$pais)
prueba1$pais = gsub("Birri", "Birmania", prueba1$pais)
prueba1$pais = gsub("Myanmar", "Birmania", prueba1$pais)
prueba1$pais = gsub("Bosnia y Hercegovina", "Bosnia y Herzegovina", prueba1$pais)
prueba1$pais = gsub("Botsuana", "Botswana", prueba1$pais)
prueba1$pais = gsub("Brunei Darussalam", "Brunéi", prueba1$pais)
prueba1$pais = gsub("Brunéi", "Brunei", prueba1$pais)
prueba1$pais = gsub("Congo, República del", "Congo", prueba1$pais)
prueba1$pais = gsub("Congo, República Democrática del", "República Democrática del Congo", prueba1$pais)
prueba1$pais = gsub("Côte d'Ivoire", "Costa de Marfil", prueba1$pais)
prueba1$pais = gsub("Corea, República Popular Democrática de", "Corea del Norte", prueba1$pais)
prueba1$pais = gsub("Corea, República de", "Corea del Sur", prueba1$pais)
prueba1$pais = gsub("Egipto, República Árabe de", "Egipto", prueba1$pais)
prueba1$pais = gsub("Federación de Rusia", "Rusia", prueba1$pais)
prueba1$pais = gsub("Fiyi", "Fiji", prueba1$pais)
prueba1$pais = gsub("Hong Kong, Región Administrativa Especial", "Hong Kong", prueba1$pais)
prueba1$pais = gsub("Irán, República Islámica del", "Irán", prueba1$pais)
prueba1$pais = gsub("Kazajstán", "Kazajistán", prueba1$pais)
prueba1$pais = gsub("Kenia", "Kenya", prueba1$pais)
prueba1$pais = gsub("República Democrática Popular Lao", "Laos", prueba1$pais)
prueba1$pais = gsub("Lesoto", "Lesotho", prueba1$pais)
prueba1$pais = gsub("Macedonia del Norte", "Macedonia", prueba1$pais)
prueba1$pais = gsub("República de Moldova", "Moldavia", prueba1$pais)
prueba1$pais = gsub("Malaui", "Malawi", prueba1$pais)
prueba1$pais = gsub("Nueva Zelandia", "Nueva Zelanda", prueba1$pais)
prueba1$pais = gsub("Palaos", "Palau", prueba1$pais)
prueba1$pais = gsub("Papua-Nueva Guinea", "Papua Nueva Guinea", prueba1$pais)
prueba1$pais = gsub("República de Moldova", "Moldavia", prueba1$pais)
prueba1$pais = gsub("República Árabe Siria", "Siria", prueba1$pais)
prueba1$pais = gsub("Rwanda", "Ruanda", prueba1$pais)
prueba1$pais = gsub("Timor-Leste", "Timor Oriental", prueba1$pais)
prueba1$pais = gsub("Viet Nam", "Vietnam", prueba1$pais)
prueba1$pais = gsub("Yemen, Rep. del", "Yemen", prueba1$pais)
prueba1$pais = gsub("Viet Nam", "Vietnam", prueba1$pais)
prueba1$pais = gsub("Zimbabue", "Zimbabwe", prueba1$pais)
prueba1$pais = gsub("Kirguizistán", "Kirguistán", prueba1$pais)
prueba1$pais = gsub("Bután", "Bhután", prueba1$pais)
prueba1$pais = gsub("Suriname", "Surinam", prueba1$pais)
prueba1$pais = gsub("Tanzanía", "Tanzania", prueba1$pais)
#JUNTAMOS LAS FILAS CON NOMBRES IGUALES
prueba2=group_by(prueba1, pais)%>%
summarize(tasa=max(tasa, na.rm = TRUE),
code=max(code, na.rm= TRUE))
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(tasa, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## Warning in max(code, na.rm = TRUE): no non-missing arguments, returning NA
## `summarise()` ungrouping output (override with `.groups` argument)
#CAMBIAMOS EL ORDEN Y NOMBRE FINAL
datadesempleofinal <- prueba2[c("pais", "code", "tasa")]
datadesempleofinal$tasa= gsub("-Inf", NA, datadesempleofinal$tasa)
#Agregar otras variable sd egobernanza
library(rio)
library(htmltab)
perro = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/258c45e7-1b68-4b8e-853d-a2554f1bb145_Data.csv"
regulatory = import(perro)
str(regulatory)
## 'data.frame': 219 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Country Code : chr "AFG" "ALB" "DZA" "ASM" ...
## $ Series Name : chr "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" "Regulatory Quality: Estimate" ...
## $ Series Code : chr "RQ.EST" "RQ.EST" "RQ.EST" "RQ.EST" ...
## $ 2019 [YR2019]: chr "-1.120555" "0.2743798" "-1.303379" "-0.2996051" ...
regulatory$`Country Name` = NULL
regulatory$`Series Code` = NULL
regulatory$`Series Name` = NULL
regulatory[regulatory == '..'] <- NA
regulatory$`2019 [YR2019]` = as.numeric(regulatory$`2019 [YR2019]`)
names(regulatory) = c("Code","Regulatory_quality")
regulatory=na.omit(regulatory)
gato= "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/51253f2e-7374-408f-8685-c729a64d043a_Data.csv"
control_co = import(gato)
str(control_co)
## 'data.frame': 219 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Country Code : chr "AFG" "ALB" "DZA" "ASM" ...
## $ Series Name : chr "Control of Corruption: Estimate" "Control of Corruption: Estimate" "Control of Corruption: Estimate" "Control of Corruption: Estimate" ...
## $ Series Code : chr "CC.EST" "CC.EST" "CC.EST" "CC.EST" ...
## $ 2019 [YR2019]: chr "-1.401076" "-0.5287576" "-0.6218498" "1.843883" ...
control_co$`Country Name` = NULL
control_co$`Series Code` = NULL
control_co$`Series Name` = NULL
control_co[control_co == '..'] <- NA
control_co$`2019 [YR2019]` = as.numeric(control_co$`2019 [YR2019]`)
names(control_co) = c("Code","control_co")
control_co=na.omit(control_co)
#Rule of law
AXA = "https://raw.githubusercontent.com/AriannaNKZC/Estad-2/master/a9249c7d-95ab-4618-9160-3a247dea2bae_Data.csv"
ruleof = import(AXA)
str(ruleof)
## 'data.frame': 219 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Country Code : chr "AFG" "ALB" "DZA" "ASM" ...
## $ Series Name : chr "Rule of Law: Estimate" "Rule of Law: Estimate" "Rule of Law: Estimate" "Rule of Law: Estimate" ...
## $ Series Code : chr "RL.EST" "RL.EST" "RL.EST" "RL.EST" ...
## $ 2019 [YR2019]: chr "-1.713527" "-0.4111794" "-0.8154638" "1.335098" ...
ruleof$`Country Name` = NULL
ruleof$`Series Code` = NULL
ruleof$`Series Name` = NULL
ruleof[ruleof == '..'] <- NA
ruleof[2] = lapply(ruleof[2], as.numeric)
names(ruleof) = c("Code","Ruleoflaw")
ruleof=na.omit(ruleof)
#Insertando Voice and accountability
VA = 'https://github.com/AriannaNKZC/Estad-2/raw/master/Voice_and_accountability.csv'
VocA = import(VA)
str(VocA)
## 'data.frame': 219 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Country Code : chr "AFG" "ALB" "DZA" "ASM" ...
## $ Series Name : chr "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" "Voice and Accountability: Estimate" ...
## $ Series Code : chr "VA.EST" "VA.EST" "VA.EST" "VA.EST" ...
## $ 2019 [YR2019]: chr "-0.9880323" "0.1518047" "-1.037679" ".." ...
VocA$`Country Name` = NULL
VocA$`Series Code` = NULL
VocA$`Series Name` = NULL
VocA[VocA == '..'] <- NA
VocA[2] = lapply(VocA[2], as.numeric)
names(VocA) = c("Code","Voice_acco")
VocA=na.omit(VocA)
#Insertando Political stability
PS='https://github.com/AriannaNKZC/Estad-2/raw/master/e0757e7a-8829-44d2-a7a3-11a580c19a53_Data.csv'
PolS = import(PS)
str(PolS)
## 'data.frame': 219 obs. of 5 variables:
## $ Country Name : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Country Code : chr "AFG" "ALB" "DZA" "ASM" ...
## $ Series Name : chr "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" "Political Stability and Absence of Violence/Terrorism: Estimate" ...
## $ Series Code : chr "PV.EST" "PV.EST" "PV.EST" "PV.EST" ...
## $ 2019 [YR2019]: chr "-2.649407" "0.1185695" "-1.003575" "1.16038" ...
PolS$`Country Name` = NULL
PolS$`Series Code` = NULL
PolS$`Series Name` = NULL
PolS[PolS == '..'] <- NA
PolS[2] = lapply(PolS[2], as.numeric)
names(PolS) =c("Code","Political_sta")
PolS=na.omit(PolS)
elefante=merge(regulatory, control_co, by.x = "Code", by.y = "Code")
elefante=merge(elefante, ruleof, by.x = "Code", by.y = "Code")
elefante=merge(elefante, VocA, by.x = "Code", by.y = "Code")
elefante=merge(elefante, PolS, by.x = "Code", by.y = "Code")
####MERGE DE LAS 3 TABLAS
DATA1=merge(APOYOECO,datadensidad, by.x="code", by.y="code")
DATAFINAL=merge(DATA1,datadesempleofinal, by.x="code", by.y="code")
DATAFINAL = DATAFINAL[,c(1:4, 6, 8)]
names(DATAFINAL) = c("Code", "Fecha", "Pais", "Apoyo", "Densidad", "Desempleo")
DATAFINAL=DATAFINAL[!duplicated(DATAFINAL$Pais),]
str(DATAFINAL)
## 'data.frame': 152 obs. of 6 variables:
## $ Code : chr "AFG" "AGO" "ALB" "AND" ...
## $ Fecha : Date, format: "2020-03-01" "2020-03-26" ...
## $ Pais : chr "Afghanistan" "Angola" "Albania" "Andorra" ...
## $ Apoyo : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Densidad : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Desempleo: chr "24" "7" "14" "4" ...
DATAFINAL$Densidad=as.numeric(DATAFINAL$Densidad)
DATAFINAL$Desempleo=as.numeric(DATAFINAL$Desempleo)
DATAFINAL$Apoyo = as.factor(DATAFINAL$Apoyo)
levels(DATAFINAL$Apoyo) <- c("Sin apoyo", "Menos del 50% del sueldo", "Más del 50% del sueldo")
names(DATAFINAL) = c("Code", "Fecha (Dia 7 de cada pais)", "Pais", "Apoyo Economico", "Densidad (2018)", "Desempleo (% al 2019)")
str(DATAFINAL)
## 'data.frame': 152 obs. of 6 variables:
## $ Code : chr "AFG" "AGO" "ALB" "AND" ...
## $ Fecha (Dia 7 de cada pais): Date, format: "2020-03-01" "2020-03-26" ...
## $ Pais : chr "Afghanistan" "Angola" "Albania" "Andorra" ...
## $ Apoyo Economico : Factor w/ 3 levels "Sin apoyo","Menos del 50% del sueldo",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 104.6 163.8 135.6 ...
## $ Desempleo (% al 2019) : num 24 7 14 4 2 8 6 6 5 NA ...
#el grange merge
names(Tabla_Final)[1]="Code"
Tabla_Final$coPAIS = NULL
names(ruleof)[1]="Code"
str(esperanzita)
## 'data.frame': 155 obs. of 4 variables:
## $ CODE : chr "AFG" "AGO" "ALB" "AND" ...
## $ Country : chr "Afganistán" "Angola" "Albania" "Andorra" ...
## $ pobla : num 38041754 31825295 2854191 77142 9770529 ...
## $ promedio: num 3.18e-05 3.59e-05 5.29e-03 1.20e-01 8.19e-05 ...
names(dataFINAL)[1]="Code"
names(esperanzita)[1]="Code"
esperanzita$Country = NULL
names(dataFINAL)[3]="d7"
data=merge(Carla, Tabla_Final, by.x = "Code", by.y = "Code")
data=merge(data, dataFINAL, by.x = "Code", by.y = "Code")
data=merge(data, DATAFINAL, by.x = "Code", by.y = "Code")
data=merge(data, Prueba100, by.x = "Code", by.y = "Code")
data=merge(data, elefante, by.x = "Code", by.y = "Code")
data=merge(data, esperanzita, by.x = "Code", by.y = "Code")
#Contagiados al día30
Eliminamos lo que no sirve
names(data)
## [1] "Code" "Country.x"
## [3] "HDI" "EXPECTATIVAVIDA"
## [5] "EXPECTCOLE" "YEARS_SCHOOLING"
## [7] "GNI_GROSSNATIONALINCOME" "infoalawk"
## [9] "d7.x" "d100"
## [11] "%poburb18" "PAIS"
## [13] "GS_2017" "PPP_2018"
## [15] "StringencyIndex" "d7.y"
## [17] "Indice" "Fecha (Dia 7 de cada pais)"
## [19] "Pais" "Apoyo Economico"
## [21] "Densidad (2018)" "Desempleo (% al 2019)"
## [23] "Country.y" "Fecha"
## [25] "Valor" "Regulatory_quality"
## [27] "control_co" "Ruleoflaw"
## [29] "Voice_acco" "Political_sta"
## [31] "pobla" "promedio"
data=data[,c(-16,-18, -19, -23,-24)]
Renombramos
names(data)[2]="Country"
names(data)[9]="d7"
names(data)[20]="Contagd100"
Cuántos na’s hay
data[!complete.cases(data),] #13 Valores perdidos!
data = data[complete.cases(data),] #Los eliminamos
#Tercer entregable
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::between() masks data.table::between()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x purrr::flatten() masks jsonlite::flatten()
## x data.table::hour() masks lubridate::hour()
## x lubridate::intersect() masks base::intersect()
## x data.table::isoweek() masks lubridate::isoweek()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x data.table::mday() masks lubridate::mday()
## x data.table::minute() masks lubridate::minute()
## x data.table::month() masks lubridate::month()
## x data.table::quarter() masks lubridate::quarter()
## x data.table::second() masks lubridate::second()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::transpose() masks data.table::transpose()
## x lubridate::union() masks base::union()
## x data.table::wday() masks lubridate::wday()
## x data.table::week() masks lubridate::week()
## x data.table::yday() masks lubridate::yday()
## x data.table::year() masks lubridate::year()
library(DescTools)
library(readxl)
library(foreign)
library(descr)
library(DescTools)
library(haven)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:DescTools':
##
## Recode
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following objects are masked from 'package:DescTools':
##
## AUC, ICC, SD
library(PMCMRplus)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
str(data)
## 'data.frame': 128 obs. of 27 variables:
## $ Code : chr "AFG" "AGO" "AND" "ARE" ...
## $ Country : chr "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : int 2 1 0 0 2 2 2 2 2 2 ...
## $ d7 : Date, format: "2020-03-01" "2020-03-26" ...
## $ d100 : Date, format: "2020-06-02" "2020-06-27" ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ PAIS : chr "Afganistan" "Angola" "Andorra" "Emiratos Arabes Unidos" ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Indice : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ Apoyo Economico : Factor w/ 3 levels "Sin apoyo","Menos del 50% del sueldo",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : num 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : int 16509 259 852 16240 25987 6847 16771 7876 53981 850 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ pobla : num 38041754 31825295 77142 9770529 44938712 ...
## $ promedio : num 3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...
data = data[!duplicated(data),]
scale_y_continuous(labels = scales::comma)
## <ScaleContinuousPosition>
## Range:
## Limits: 0 -- 1
#row.names(data) = data$Country
#data$Country = NULL #Elimino country porque ya está como row name y el Code solo sirve para el merge
Reconfigurando variables
#arreglando las númericas
data$Contagd100 = as.numeric(data$Contagd100)
data$`Desempleo (% al 2019)` = as.numeric(data$`Desempleo (% al 2019)`)
table(data$`Apoyo Economico`)
##
## Sin apoyo Menos del 50% del sueldo Más del 50% del sueldo
## 117 7 0
#Arreglando las ordinales
data$`Apoyo Economico` = as.ordered(data$`Apoyo Economico`)
str(data$`Apoyo Economico`)
## Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
str(data$infoalawk)
## int [1:124] 2 1 0 0 2 2 2 2 2 2 ...
data$infoalawk = as.ordered(data$infoalawk)
levels(data$infoalawk) = c("Ninguna", "Campañas del gobierno", "Campañas integrales")
table(data$infoalawk)
##
## Ninguna Campañas del gobierno Campañas integrales
## 18 19 87
#POSTER
library(tidyverse)
library(DescTools)
library(readxl)
library(foreign)
library(descr)
library(DescTools)
library(haven)
library(car)
library(psych)
library(PMCMRplus)
library(Rmisc)
library(htmltab)
library(stringr)
library(polycor)
##
## Attaching package: 'polycor'
## The following object is masked from 'package:psych':
##
## polyserial
library(ggcorrplot)
library(psych)
library(matrixcalc)
library(GPArotation)
library(plotly)
##
## Attaching package: 'plotly'
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(fpc)
library(cluster)
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:fpc':
##
## dbscan
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse
## The following object is masked from 'package:DescTools':
##
## %nin%
## The following object is masked from 'package:base':
##
## isFALSE
library(dplyr)
library(haven)
library(jtools)
##
## Attaching package: 'jtools'
## The following object is masked from 'package:BBmisc':
##
## %nin%
## The following object is masked from 'package:DescTools':
##
## %nin%
str(data)
## 'data.frame': 124 obs. of 27 variables:
## $ Code : chr "AFG" "AGO" "AND" "ARE" ...
## $ Country : chr "Afghanistan" "Angola" "Andorra" "United Arab Emirates" ...
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
## $ d7 : Date, format: "2020-03-01" "2020-03-26" ...
## $ d100 : Date, format: "2020-06-02" "2020-06-27" ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ PAIS : chr "Afganistan" "Angola" "Andorra" "Emiratos Arabes Unidos" ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Indice : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ Apoyo Economico : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : num 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : num 16509 259 852 16240 25987 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ pobla : num 38041754 31825295 77142 9770529 44938712 ...
## $ promedio : num 3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...
data$Code = NULL
data$d7 = NULL
data$d100 = NULL
data$`%pobrur18` = NULL
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "PAIS" "GS_2017"
## [11] "PPP_2018" "StringencyIndex"
## [13] "Indice" "Apoyo Economico"
## [15] "Densidad (2018)" "Desempleo (% al 2019)"
## [17] "Contagd100" "Regulatory_quality"
## [19] "control_co" "Ruleoflaw"
## [21] "Voice_acco" "Political_sta"
## [23] "pobla" "promedio"
data$GEE = data$Indice
data$Indice = NULL
data$PAIS = NULL
data=data[c(1:105, 107:124),]
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "pobla" "promedio"
## [23] "GEE"
theData = data
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "pobla" "promedio"
## [23] "GEE"
theData = (data[, c(7, 11, 12, 16:20,23)])
table(theData$`Apoyo Economico`)
##
## Sin apoyo Menos del 50% del sueldo
## 116 7
theData$infoalawk = NULL
theData$`Apoyo Economico`= NULL
theData$StringencyIndex = NULL
#theData$`Apoyo Economico` = as.numeric(theData$`Apoyo Economico`)
#theData$infoalawk = as.numeric(theData$infoalawk)
#theData$Voice_acco = NULL
str(theData)
## 'data.frame': 123 obs. of 6 variables:
## $ Regulatory_quality: num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
#cambiando a nombres más bonitos
lapiz=polycor::hetcor(theData)$correlations
Explorar correlaciones:
ggcorrplot(lapiz)
#evaluandos ignificancia
ggcorrplot(lapiz,
p.mat = cor_pmat(lapiz),
insig = "blank",
title = "Gráfico 1: Matriz de correlación")
psych::KMO(lapiz)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = lapiz)
## Overall MSA = 0.88
## MSA for each item =
## Regulatory_quality control_co Ruleoflaw Voice_acco
## 0.84 0.89 0.88 0.89
## Political_sta GEE
## 0.96 0.86
cortest.bartlett(lapiz,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(lapiz)
## [1] FALSE
fa.parallel(theData, fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
mandarina <- fa(theData,nfactors = 1,cor = 'mixed',rotate ="varimax",fm="minres")
## mixed.cor is deprecated, please use mixedCor.
print(mandarina$loadings)
##
## Loadings:
## MR1
## Regulatory_quality 0.945
## control_co 0.955
## Ruleoflaw 0.984
## Voice_acco 0.795
## Political_sta 0.800
## GEE 0.956
##
## MR1
## SS loadings 4.961
## Proportion Var 0.827
fa.diagram(mandarina, main = c("Gráfico 2: Árbol de factorización del primer modelo"))
Evaluando Resultado obtenido: ¿La Raíz del error cuadrático medio corregida está cerca a cero?
mandarina$crms
## [1] 0.03183335
¿La Raíz del error cuadrático medio de aproximación es menor a 0.05?
mandarina$RMSEA
## RMSEA lower upper confidence
## 0.2072691 0.1583127 0.2617740 0.9000000
¿El índice de Tucker-Lewis es mayor a 0.9?
mandarina$TLI
## [1] 0.9248696
¿Qué variables aportaron mas a los factores?
sort(mandarina$communality)
## Voice_acco Political_sta Regulatory_quality control_co
## 0.6325016 0.6405237 0.8922293 0.9126989
## GEE Ruleoflaw
## 0.9143820 0.9686808
¿Qué variables contribuyen a mas de un factor? #conviene que salga 1
sort(mandarina$complexity)
## Ruleoflaw Voice_acco Political_sta control_co
## 1 1 1 1
## Regulatory_quality GEE
## 1 1
factorial_casos<-as.data.frame(mandarina$scores) #en esta no me sale el factorial
head(factorial_casos)
summary(factorial_casos)
## MR1
## Min. :-1.8104
## 1st Qu.:-0.7263
## Median :-0.2219
## Mean : 0.0000
## 3rd Qu.: 0.5883
## Max. : 2.0378
AJA=cbind(data[1],as.data.frame(mandarina$scores))
data$Gobernanza= normalize(AJA$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
#{r} data$Medidas_tempranas=normalize(AJA$MR2, method = "range", margin=2, # by column range = c(0, 10)) #
data$Contagd100 = (data$Contagd100/data$pobla)*100
names(data)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "pobla" "promedio"
## [23] "GEE" "Gobernanza"
data_regre=data
names(data_regre)
## [1] "Country" "HDI"
## [3] "EXPECTATIVAVIDA" "EXPECTCOLE"
## [5] "YEARS_SCHOOLING" "GNI_GROSSNATIONALINCOME"
## [7] "infoalawk" "%poburb18"
## [9] "GS_2017" "PPP_2018"
## [11] "StringencyIndex" "Apoyo Economico"
## [13] "Densidad (2018)" "Desempleo (% al 2019)"
## [15] "Contagd100" "Regulatory_quality"
## [17] "control_co" "Ruleoflaw"
## [19] "Voice_acco" "Political_sta"
## [21] "pobla" "promedio"
## [23] "GEE" "Gobernanza"
#rownames(data_regre)=data$Country
data_regre$Country = NULL
str(data_regre)
## 'data.frame': 123 obs. of 23 variables:
## $ HDI : num 0.496 0.575 0.857 0.866 0.83 ...
## $ EXPECTATIVAVIDA : num 64.5 60.8 81.8 77.8 76.5 ...
## $ EXPECTCOLE : num 10.1 11.8 13.3 13.6 17.6 ...
## $ YEARS_SCHOOLING : num 3.93 5.13 10.16 10.95 10.56 ...
## $ GNI_GROSSNATIONALINCOME: num 1746 5555 48641 66912 17611 ...
## $ infoalawk : Ord.factor w/ 3 levels "Ninguna"<"Campañas del gobierno"<..: 3 2 1 1 3 3 3 3 3 3 ...
## $ %poburb18 : num 25.5 65.5 88.1 86.5 91.9 ...
## $ GS_2017 : num 11.78 2.79 10.32 3.33 9.12 ...
## $ PPP_2018 : num 524 3290 41793 43839 11684 ...
## $ StringencyIndex : num 27.78 33.33 0 2.78 11.11 ...
## $ Apoyo Economico : Ord.factor w/ 2 levels "Sin apoyo"<"Menos del 50% del sueldo": 1 1 1 1 1 1 1 1 1 1 ...
## $ Densidad (2018) : num 56.9 24.7 163.8 135.6 16.3 ...
## $ Desempleo (% al 2019) : num 24 7 4 2 8 6 6 5 7 1 ...
## $ Contagd100 : num 0.043397 0.000814 1.104457 0.166214 0.057828 ...
## $ Regulatory_quality : num -1.121 -0.894 1.228 0.979 -0.493 ...
## $ control_co : num -1.4011 -1.0547 1.2344 1.1063 -0.0711 ...
## $ Ruleoflaw : num -1.714 -1.054 1.58 0.84 -0.431 ...
## $ Voice_acco : num -0.988 -0.777 1.139 -1.122 0.6 ...
## $ Political_sta : num -2.649 -0.311 1.615 0.703 -0.12 ...
## $ pobla : num 38041754 31825295 77142 9770529 44938712 ...
## $ promedio : num 3.18e-05 3.59e-05 1.20e-01 8.19e-05 5.62e-04 ...
## $ GEE : num -1.457 -1.052 1.945 1.431 0.026 ...
## $ Gobernanza : num 0.101 1.694 8.687 6.921 3.725 ...
names(data_regre)=c("IDH","EXPECTATIVAVIDA", "EXPECTCOLE", "añosEscol","RentaNacional", "Campañas informativas", "PoblacionUrbana", "GastoenSalud", "PBI per cápita (2018)", "Indice de Rigurosidad", "Apoyo Economico", "Densidad", "Desempleo (% al 2019)","Contagiados", "Regulatory quality", "control of corruption", "rule of law", "voice_acco", "political stability", "pobla", "promedio", "Indice de efectividad de la gobernanza","gobernanza")
MINARIS=formula(Contagiados~data_regre$EXPECTCOLE+data_regre$Densidad+data_regre$voice_acco+data_regre$RentaNacional +data_regre$PoblacionUrbana)
MINARISA=lm(MINARIS,data=data_regre)
summary(MINARISA)
##
## Call:
## lm(formula = MINARIS, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66663 -0.07317 -0.01813 0.04735 0.96256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.378e-01 1.110e-01 2.143 0.034219 *
## data_regre$EXPECTCOLE -3.578e-02 9.477e-03 -3.776 0.000252 ***
## data_regre$Densidad 7.437e-05 7.216e-05 1.031 0.304850
## data_regre$voice_acco -3.590e-02 2.410e-02 -1.490 0.138916
## data_regre$RentaNacional 1.232e-05 1.294e-06 9.522 2.93e-16 ***
## data_regre$PoblacionUrbana 2.408e-03 1.144e-03 2.105 0.037436 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1888 on 117 degrees of freedom
## Multiple R-squared: 0.5764, Adjusted R-squared: 0.5583
## F-statistic: 31.84 on 5 and 117 DF, p-value: < 2.2e-16
ewe=formula(Contagiados~data_regre$gobernanza+data_regre$Densidad+data_regre$RentaNacional+data_regre$EXPECTCOLE)
uwu=lm(ewe,data=data_regre)
summary(uwu)
##
## Call:
## lm(formula = ewe, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63850 -0.07487 -0.01371 0.03872 0.87638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.737e-01 9.645e-02 2.838 0.00534 **
## data_regre$gobernanza -3.812e-02 1.236e-02 -3.083 0.00255 **
## data_regre$Densidad 7.892e-05 7.051e-05 1.119 0.26527
## data_regre$RentaNacional 1.574e-05 1.381e-06 11.393 < 2e-16 ***
## data_regre$EXPECTCOLE -1.972e-02 9.072e-03 -2.173 0.03177 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1862 on 118 degrees of freedom
## Multiple R-squared: 0.5843, Adjusted R-squared: 0.5702
## F-statistic: 41.46 on 4 and 118 DF, p-value: < 2.2e-16
MINARISE=formula(Contagiados~data_regre$gobernanza+data_regre$PoblacionUrbana+data_regre$RentaNacional +data_regre$EXPECTCOLE)
MINARISEM=lm(MINARISE,data=data_regre)
summary(MINARISEM)
##
## Call:
## lm(formula = MINARISE, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65006 -0.07640 -0.02578 0.04720 0.88223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.758e-01 9.503e-02 2.902 0.00442 **
## data_regre$gobernanza -3.451e-02 1.249e-02 -2.762 0.00666 **
## data_regre$PoblacionUrbana 1.886e-03 1.137e-03 1.659 0.09970 .
## data_regre$RentaNacional 1.478e-05 1.525e-06 9.689 < 2e-16 ***
## data_regre$EXPECTCOLE -2.733e-02 9.939e-03 -2.749 0.00691 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1851 on 118 degrees of freedom
## Multiple R-squared: 0.5894, Adjusted R-squared: 0.5755
## F-statistic: 42.35 on 4 and 118 DF, p-value: < 2.2e-16
efe= formula(Contagiados~ + data_regre$gobernanza)
afa = lm(efe, data = data_regre)
summary(afa)
##
## Call:
## lm(formula = efe, data = data_regre)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29868 -0.11923 -0.05979 0.01058 2.22610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.020026 0.050827 -0.394 0.694272
## data_regre$gobernanza 0.035575 0.009484 3.751 0.000272 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2699 on 121 degrees of freedom
## Multiple R-squared: 0.1042, Adjusted R-squared: 0.09676
## F-statistic: 14.07 on 1 and 121 DF, p-value: 0.0002718
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
Anovita=anova(MINARISA, uwu, MINARISEM)
stargazer(Anovita,type = 'text',summary = F,title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## =====================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -------------------------------------
## 1 117 4.169
## 2 118 4.092 -1 0.077
## 3 118 4.041 0 0.051
## -------------------------------------
stargazer(uwu, MINARISA, afa, MINARISEM, type='text')
##
## ===================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------------
## Contagiados
## (1) (2) (3) (4)
## -------------------------------------------------------------------------------------------------------------------
## gobernanza -0.038*** 0.036*** -0.035***
## (0.012) (0.009) (0.012)
##
## Densidad 0.0001 0.0001
## (0.0001) (0.0001)
##
## voice_acco -0.036
## (0.024)
##
## RentaNacional 0.00002*** 0.00001*** 0.00001***
## (0.00000) (0.00000) (0.00000)
##
## PoblacionUrbana 0.002** 0.002*
## (0.001) (0.001)
##
## EXPECTCOLE -0.020** -0.036*** -0.027***
## (0.009) (0.009) (0.010)
##
## Constant 0.274*** 0.238** -0.020 0.276***
## (0.096) (0.111) (0.051) (0.095)
##
## -------------------------------------------------------------------------------------------------------------------
## Observations 123 123 123 123
## R2 0.584 0.576 0.104 0.589
## Adjusted R2 0.570 0.558 0.097 0.576
## Residual Std. Error 0.186 (df = 118) 0.189 (df = 117) 0.270 (df = 121) 0.185 (df = 118)
## F Statistic 41.456*** (df = 4; 118) 31.845*** (df = 5; 117) 14.070*** (df = 1; 121) 42.349*** (df = 4; 118)
## ===================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
el_elegido = MINARISEM
MInarisa
library(ggpubr) #gráfico para ver normalidad
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(scatterplot3d)
library(stargazer)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#LINEALIDAD
plot(el_elegido, 1, main = c("Gráfico 2: Linealidad")) #diagonal, casi lineal
B. Homocedasticidad.
plot(el_elegido, 3, main = c("Gráfico 3: Homocedasticidad"))#diagonal
bptest(el_elegido) #valor P mayor a 0.05 Homocedasticidad
##
## studentized Breusch-Pagan test
##
## data: el_elegido
## BP = 55.086, df = 4, p-value = 3.117e-11
c. Normalidad de residuos. Puntos cerca de la diagonal.
plot(el_elegido, 2, main = c("Gráfico 4: Normalidad de residuos")) #se alejan de diagonal
shapiro.test(el_elegido$residuals) #menor a 0.05 el valor P entonces indica que no hay normaldiad de residusos
##
## Shapiro-Wilk normality test
##
## data: el_elegido$residuals
## W = 0.82612, p-value = 9.628e-11
VIF(el_elegido)
## data_regre$gobernanza data_regre$PoblacionUrbana
## 3.691249 2.228003
## data_regre$RentaNacional data_regre$EXPECTCOLE
## 3.100896 3.052885
5.2 ver valores influyentes Prestar atención al indice de Cook.
plot(el_elegido, 5, main = c("Gráfico 5: Identificación de valores influyentes"))
checkMINARISA=as.data.frame(influence.measures(el_elegido)$is.inf)
checkMINARISA[checkMINARISA$cook.d | checkMINARISA$hat,] #120, 124
#data_regre