Carga y limpieza preliminar de los datos

Los datos que se van a analizar en este documento proceden de la compilación hecha por ususarios de Kaggle. La fecha del analisis inicia el 11 de septiembre del 2020.

#import pandas as pd
#datos = pd.read_csv()
datos<-read.delim("C:/cursoanalisisdatoscovid/covid_19_clean_complete.csv", sep = ",")

kable(head(datos,10))
Province.State Country.Region Lat Long Date Confirmed Deaths Recovered
Afghanistan 33.0000 65.0000 1/22/20 0 0 0
Albania 41.1533 20.1683 1/22/20 0 0 0
Algeria 28.0339 1.6596 1/22/20 0 0 0
Andorra 42.5063 1.5218 1/22/20 0 0 0
Angola -11.2027 17.8739 1/22/20 0 0 0
Antigua and Barbuda 17.0608 -61.7964 1/22/20 0 0 0
Argentina -38.4161 -63.6167 1/22/20 0 0 0
Armenia 40.0691 45.0382 1/22/20 0 0 0
Australian Capital Territory Australia -35.4735 149.0124 1/22/20 0 0 0
New South Wales Australia -33.8688 151.2093 1/22/20 0 0 0

Estructura de los datos

str(datos)
## 'data.frame':    21484 obs. of  8 variables:
##  $ Province.State: chr  "" "" "" "" ...
##  $ Country.Region: chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ Lat           : num  33 41.2 28 42.5 -11.2 ...
##  $ Long          : num  65 20.17 1.66 1.52 17.87 ...
##  $ Date          : chr  "1/22/20" "1/22/20" "1/22/20" "1/22/20" ...
##  $ Confirmed     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Deaths        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Recovered     : int  0 0 0 0 0 0 0 0 0 0 ...
colnames(datos)
## [1] "Province.State" "Country.Region" "Lat"            "Long"          
## [5] "Date"           "Confirmed"      "Deaths"         "Recovered"
colnames(datos)=c("Provincia_estado",
                  "Pais_region",
                  "Latitud",# Norte+ o Sur-
                  "Longitud",# Este+ Oeste-
                  "Fecha",
                  "Casos_Confirmados",
                  "Casos_Muertos",
                  "Casos_Recuperados")
datos %>% head(10) %>% kable() %>% kable_styling()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados
Afghanistan 33.0000 65.0000 1/22/20 0 0 0
Albania 41.1533 20.1683 1/22/20 0 0 0
Algeria 28.0339 1.6596 1/22/20 0 0 0
Andorra 42.5063 1.5218 1/22/20 0 0 0
Angola -11.2027 17.8739 1/22/20 0 0 0
Antigua and Barbuda 17.0608 -61.7964 1/22/20 0 0 0
Argentina -38.4161 -63.6167 1/22/20 0 0 0
Armenia 40.0691 45.0382 1/22/20 0 0 0
Australian Capital Territory Australia -35.4735 149.0124 1/22/20 0 0 0
New South Wales Australia -33.8688 151.2093 1/22/20 0 0 0
datos$Provincia_estado=factor(datos$Provincia_estado)
datos$Pais_region=factor(datos$Pais_region)

Procesar

str(datos)
## 'data.frame':    21484 obs. of  8 variables:
##  $ Provincia_estado : Factor w/ 81 levels "","Alberta","Anguilla",..: 1 1 1 1 1 1 1 1 6 49 ...
##  $ Pais_region      : Factor w/ 185 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 9 ...
##  $ Latitud          : num  33 41.2 28 42.5 -11.2 ...
##  $ Longitud         : num  65 20.17 1.66 1.52 17.87 ...
##  $ Fecha            : chr  "1/22/20" "1/22/20" "1/22/20" "1/22/20" ...
##  $ Casos_Confirmados: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Casos_Muertos    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Casos_Recuperados: int  0 0 0 0 0 0 0 0 0 0 ...
head(datos,10)
##                Provincia_estado         Pais_region  Latitud Longitud   Fecha
## 1                                       Afghanistan  33.0000  65.0000 1/22/20
## 2                                           Albania  41.1533  20.1683 1/22/20
## 3                                           Algeria  28.0339   1.6596 1/22/20
## 4                                           Andorra  42.5063   1.5218 1/22/20
## 5                                            Angola -11.2027  17.8739 1/22/20
## 6                               Antigua and Barbuda  17.0608 -61.7964 1/22/20
## 7                                         Argentina -38.4161 -63.6167 1/22/20
## 8                                           Armenia  40.0691  45.0382 1/22/20
## 9  Australian Capital Territory           Australia -35.4735 149.0124 1/22/20
## 10              New South Wales           Australia -33.8688 151.2093 1/22/20
##    Casos_Confirmados Casos_Muertos Casos_Recuperados
## 1                  0             0                 0
## 2                  0             0                 0
## 3                  0             0                 0
## 4                  0             0                 0
## 5                  0             0                 0
## 6                  0             0                 0
## 7                  0             0                 0
## 8                  0             0                 0
## 9                  0             0                 0
## 10                 0             0                 0

#Verificar datos anomalos en los registros \[Casos confirmados=muertos+recuperados + enfermos\]

*Añadir una columna al set de datos

datos%<>%mutate(casos_enfermos=Casos_Confirmados-Casos_Muertos-Casos_Recuperados)
datos%>%
  filter(Casos_Confirmados>10000)%>%
  head(10)%>%
  kable()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados casos_enfermos
Hubei China 30.9756 112.2707 2/2/20 11177 350 295 10532
Hubei China 30.9756 112.2707 2/3/20 13522 414 386 12722
Hubei China 30.9756 112.2707 2/4/20 16678 479 522 15677
Hubei China 30.9756 112.2707 2/5/20 19665 549 633 18483
Hubei China 30.9756 112.2707 2/6/20 22112 618 817 20677
Hubei China 30.9756 112.2707 2/7/20 24953 699 1115 23139
Hubei China 30.9756 112.2707 2/8/20 27100 780 1439 24881
Hubei China 30.9756 112.2707 2/9/20 29631 871 1795 26965
Hubei China 30.9756 112.2707 2/10/20 31728 974 2222 28532
Hubei China 30.9756 112.2707 2/11/20 33366 1068 2639 29659

*revisar la consistencia de los datos

datos%>%
filter(casos_enfermos<0)%>%
  kable()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados casos_enfermos
Diamond Princess Canada 0.0000 0.0000 3/22/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 3/23/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/24/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/24/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/25/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/25/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/26/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/26/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/27/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/27/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/28/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/28/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/29/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/29/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/30/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/30/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/31/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/31/20 0 1 0 -1
Hainan China 19.1959 109.7453 4/1/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 4/1/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/2/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/3/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/4/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/5/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/6/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/7/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/8/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/9/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/10/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/11/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/12/20 -1 1 0 -2
datos%>%
filter(casos_enfermos<0)%>%
  kable()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados casos_enfermos
Diamond Princess Canada 0.0000 0.0000 3/22/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 3/23/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/24/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/24/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/25/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/25/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/26/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/26/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/27/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/27/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/28/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/28/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/29/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/29/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/30/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/30/20 0 1 0 -1
Hainan China 19.1959 109.7453 3/31/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 3/31/20 0 1 0 -1
Hainan China 19.1959 109.7453 4/1/20 168 6 168 -6
Diamond Princess Canada 0.0000 0.0000 4/1/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/2/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/3/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/4/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/5/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/6/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/7/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/8/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/9/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/10/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/11/20 0 1 0 -1
Diamond Princess Canada 0.0000 0.0000 4/12/20 -1 1 0 -2
datos%>%filter(Provincia_estado=="Hainan",casos_enfermos<0)%>%
  arrange(Provincia_estado, Fecha)%>%
  mutate (Casos_Recuperados=Casos_Recuperados+casos_enfermos,casos_enfermos=0)  %>%
  kable()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados casos_enfermos
Hainan China 19.1959 109.7453 3/24/20 168 6 162 0
Hainan China 19.1959 109.7453 3/25/20 168 6 162 0
Hainan China 19.1959 109.7453 3/26/20 168 6 162 0
Hainan China 19.1959 109.7453 3/27/20 168 6 162 0
Hainan China 19.1959 109.7453 3/28/20 168 6 162 0
Hainan China 19.1959 109.7453 3/29/20 168 6 162 0
Hainan China 19.1959 109.7453 3/30/20 168 6 162 0
Hainan China 19.1959 109.7453 3/31/20 168 6 162 0
Hainan China 19.1959 109.7453 4/1/20 168 6 162 0

*corregir la inconsistencia

datos%>%filter(Pais_region=="Paraguay")%>%head(10)%>%kable()
Provincia_estado Pais_region Latitud Longitud Fecha Casos_Confirmados Casos_Muertos Casos_Recuperados casos_enfermos
Paraguay -23.4425 -58.4438 1/22/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/23/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/24/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/25/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/26/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/27/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/28/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/29/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/30/20 0 0 0 0
Paraguay -23.4425 -58.4438 1/31/20 0 0 0 0

Análisis gráfico

#filtrar los datos de Europa
#datos_europa=datos[datos$Latitud>38 & datos$Longitud<30]