Los datos que se van a analizar en este documento, proceden de la compilación hecha por usuarios de Kaggle. La fecha del análisis empieza el 30 de abril de 2020, utilizando la versión 83 recopilada en la web anterior
Aqui se pueden combinar R y Python
#import pandas as pd
#datos = pd.read_csv("covid_19_clean_complete.csv")
#datos.head(10)#
#pd <- import("pandas")
#datos <- pd$read_csv("covid_19_clean_complete.csv")
#kable(head(datos))
Vamos a hacerlo con el metodo tradicional
path <- '~/Desktop/DATA ANALYST, SCIENCE y CERTIFICADOS/DS4B VIRTUAL CAMP + ESTADÍSTICA + CODING/Predicción y cuadro de mando CORONAVIRUS/CURSO UDEMY EN R CORONAVIRUS/COVID19/covid_19_clean_complete.csv'
read_lines(path,n_max = 5)
## [1] "Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered"
## [2] ",Afghanistan,33.0,65.0,1/22/20,0,0,0"
## [3] ",Albania,41.1533,20.1683,1/22/20,0,0,0"
## [4] ",Algeria,28.0339,1.6596,1/22/20,0,0,0"
## [5] ",Andorra,42.5063,1.5218,1/22/20,0,0,0"
datos <- read.csv("covid_19_clean_complete.csv")
glimpse(datos)
## Observations: 25,938
## Variables: 8
## $ Province.State <fct> , , , , , , , , Australian Capital Territory, New Sout…
## $ Country.Region <fct> Afghanistan, Albania, Algeria, Andorra, Angola, Antigu…
## $ Lat <dbl> 33.0000, 41.1533, 28.0339, 42.5063, -11.2027, 17.0608,…
## $ Long <dbl> 65.0000, 20.1683, 1.6596, 1.5218, 17.8739, -61.7964, -…
## $ Date <fct> 1/22/20, 1/22/20, 1/22/20, 1/22/20, 1/22/20, 1/22/20, …
## $ Confirmed <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Deaths <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Recovered <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
view(datos)
datos %>% head(10) %>% kable()
| Province.State | Country.Region | Lat | Long | Date | Confirmed | Deaths | Recovered |
|---|---|---|---|---|---|---|---|
| Afghanistan | 33.0000 | 65.0000 | 1/22/20 | 0 | 0 | 0 | |
| Albania | 41.1533 | 20.1683 | 1/22/20 | 0 | 0 | 0 | |
| Algeria | 28.0339 | 1.6596 | 1/22/20 | 0 | 0 | 0 | |
| Andorra | 42.5063 | 1.5218 | 1/22/20 | 0 | 0 | 0 | |
| Angola | -11.2027 | 17.8739 | 1/22/20 | 0 | 0 | 0 | |
| Antigua and Barbuda | 17.0608 | -61.7964 | 1/22/20 | 0 | 0 | 0 | |
| Argentina | -38.4161 | -63.6167 | 1/22/20 | 0 | 0 | 0 | |
| Armenia | 40.0691 | 45.0382 | 1/22/20 | 0 | 0 | 0 | |
| Australian Capital Territory | Australia | -35.4735 | 149.0124 | 1/22/20 | 0 | 0 | 0 |
| New South Wales | Australia | -33.8688 | 151.2093 | 1/22/20 | 0 | 0 | 0 |
str(datos)
## 'data.frame': 25938 obs. of 8 variables:
## $ Province.State: Factor w/ 81 levels "","Alberta","Anguilla",..: 1 1 1 1 1 1 1 1 6 49 ...
## $ Country.Region: Factor w/ 185 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 9 ...
## $ Lat : num 33 41.2 28 42.5 -11.2 ...
## $ Long : num 65 20.17 1.66 1.52 17.87 ...
## $ Date : Factor w/ 99 levels "1/22/20","1/23/20",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Confirmed : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Deaths : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Recovered : int 0 0 0 0 0 0 0 0 0 0 ...
colnames(datos) = c("Provincia_Estado",
"Pais_Region",
"Latitud",
"Longitud",
"Fecha",
"Casos_Confirmados",
"Casos_Muertos",
"Casos_Recuperados")
datos %>% head() %>% kable() %>% kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados |
|---|---|---|---|---|---|---|---|
| Afghanistan | 33.0000 | 65.0000 | 1/22/20 | 0 | 0 | 0 | |
| Albania | 41.1533 | 20.1683 | 1/22/20 | 0 | 0 | 0 | |
| Algeria | 28.0339 | 1.6596 | 1/22/20 | 0 | 0 | 0 | |
| Andorra | 42.5063 | 1.5218 | 1/22/20 | 0 | 0 | 0 | |
| Angola | -11.2027 | 17.8739 | 1/22/20 | 0 | 0 | 0 | |
| Antigua and Barbuda | 17.0608 | -61.7964 | 1/22/20 | 0 | 0 | 0 |
factor o as.factor`orderedas.numeric** En este caso todos los datos se han cargado con el tipo correcto porque hemos utilizado el read.csv, el cual tiene por defecto el paramentro StringAsFactor como TRUE, esto hace que todos los campos que vienen cn formato de texto o entre comillas sean directamente factores. Los unicos casos donde esto no nos interesa es cuando el String identifica al objeto; Por ejemplo un numero de matricula.**
Si ponemos StringAsFactors = FALSE se nos cargaran como character seguramente, por lo que podríamos modificar esos campos de la siguiente manera:
#datos$Provincia_Estado = as.factor(datos$Provincia_Estado)
#datos$Pais_Region = as.factor(datos$Pais_Region)
#str(datos)
Con el doble PIPE se puede hacer la misma accion que arriba, pero mas facil. Esto hace que la accion vaya hacia adelante y despues vuelva.
datos$Provincia_Estado %<>% as.factor()
datos$Pais_Region %<>% as.factor()
#datos$Fecha %<>% as.Date(format="%m/%d/%y") # "%m/%d/%y %H:%M:%S" si tenemos hora
datos$Fecha %<>% mdy()
str(datos)
## 'data.frame': 25938 obs. of 8 variables:
## $ Provincia_Estado : Factor w/ 81 levels "","Alberta","Anguilla",..: 1 1 1 1 1 1 1 1 6 49 ...
## $ Pais_Region : Factor w/ 185 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 9 ...
## $ Latitud : num 33 41.2 28 42.5 -11.2 ...
## $ Longitud : num 65 20.17 1.66 1.52 17.87 ...
## $ Fecha : Date, format: "2020-01-22" "2020-01-22" ...
## $ Casos_Confirmados: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Casos_Muertos : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Casos_Recuperados: int 0 0 0 0 0 0 0 0 0 0 ...
Podemos hacer operacion con las variables fechas de la libreria lubridate.
d2 <- dmy("28/04/20")
d1 <- dmy("21/01/20")
d2-d1 # Expresa la diferencia de dias
## Time difference of 98 days
is.difftime(d2-d1)
## [1] TRUE
days(d2-d1)
## [1] "98d 0H 0M 0S"
\[ CasosConfirmados = Muertos + Recuperados + Enfermos \] Vamos a crear una nueva columna de Casos enfermos y despues ver si tenemos datos anomalos
datos %<>%
mutate(Casos_Enfermos = Casos_Confirmados - Casos_Muertos - Casos_Recuperados)
datos %>%
filter(Casos_Confirmados > 10000) %>%
tail() %>%
kable() %>%
kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados | Casos_Enfermos | |
|---|---|---|---|---|---|---|---|---|---|
| 868 | Sweden | 63.0000 | 16.0000 | 2020-04-29 | 20302 | 2462 | 1005 | 16835 | |
| 869 | Switzerland | 46.8182 | 8.2275 | 2020-04-29 | 29407 | 1716 | 22600 | 5091 | |
| 870 | Turkey | 38.9637 | 35.2433 | 2020-04-29 | 117589 | 3081 | 44040 | 70468 | |
| 871 | United Arab Emirates | 24.0000 | 54.0000 | 2020-04-29 | 11929 | 98 | 2329 | 9502 | |
| 872 | United Kingdom | 55.3781 | -3.4360 | 2020-04-29 | 165221 | 26097 | 0 | 139124 | |
| 873 | US | 37.0902 | -95.7129 | 2020-04-29 | 1039909 | 60967 | 120720 | 858222 |
datos %>%
filter(Casos_Enfermos < 0) %>% # Miramos si hay casos anomalos
arrange(Provincia_Estado, Fecha) %>%
kable() %>%
kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados | Casos_Enfermos |
|---|---|---|---|---|---|---|---|---|
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-22 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-23 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-24 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-25 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-26 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-27 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-28 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-29 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-30 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-03-31 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-01 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-02 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-03 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-04 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-05 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-06 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-07 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-08 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-09 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-10 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-11 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-12 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-13 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-14 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-15 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-16 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-17 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-18 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-19 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-20 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-21 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-22 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-23 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-24 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-25 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-26 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-27 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-28 | 0 | 1 | 0 | -1 |
| Diamond Princess | Canada | 0.0000 | 0.0000 | 2020-04-29 | 0 | 1 | 0 | -1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-24 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-25 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-26 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-27 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-28 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-29 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-30 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-31 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-01 | 168 | 6 | 168 | -6 |
datos %>%
filter(Provincia_Estado == 'Hainan') %>%
kable() %>%
kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados | Casos_Enfermos |
|---|---|---|---|---|---|---|---|---|
| Hainan | China | 19.1959 | 109.7453 | 2020-01-22 | 4 | 0 | 0 | 4 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-23 | 5 | 0 | 0 | 5 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-24 | 8 | 0 | 0 | 8 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-25 | 19 | 0 | 0 | 19 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-26 | 22 | 0 | 0 | 22 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-27 | 33 | 1 | 0 | 32 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-28 | 40 | 1 | 0 | 39 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-29 | 43 | 1 | 0 | 42 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-30 | 46 | 1 | 1 | 44 |
| Hainan | China | 19.1959 | 109.7453 | 2020-01-31 | 52 | 1 | 1 | 50 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-01 | 62 | 1 | 1 | 60 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-02 | 64 | 1 | 4 | 59 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-03 | 72 | 1 | 4 | 67 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-04 | 80 | 1 | 5 | 74 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-05 | 99 | 1 | 5 | 93 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-06 | 106 | 1 | 8 | 97 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-07 | 117 | 2 | 10 | 105 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-08 | 124 | 2 | 14 | 108 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-09 | 131 | 3 | 19 | 109 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-10 | 138 | 3 | 19 | 116 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-11 | 144 | 3 | 20 | 121 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-12 | 157 | 4 | 27 | 126 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-13 | 157 | 4 | 30 | 123 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-14 | 159 | 4 | 43 | 112 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-15 | 162 | 4 | 39 | 119 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-16 | 162 | 4 | 52 | 106 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-17 | 163 | 4 | 59 | 100 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-18 | 163 | 4 | 79 | 80 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-19 | 168 | 4 | 84 | 80 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-20 | 168 | 4 | 86 | 78 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-21 | 168 | 4 | 95 | 69 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-22 | 168 | 4 | 104 | 60 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-23 | 168 | 5 | 106 | 57 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-24 | 168 | 5 | 116 | 47 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-25 | 168 | 5 | 124 | 39 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-26 | 168 | 5 | 129 | 34 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-27 | 168 | 5 | 131 | 32 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-28 | 168 | 5 | 133 | 30 |
| Hainan | China | 19.1959 | 109.7453 | 2020-02-29 | 168 | 5 | 148 | 15 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-01 | 168 | 5 | 149 | 14 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-02 | 168 | 5 | 151 | 12 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-03 | 168 | 5 | 155 | 8 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-04 | 168 | 5 | 158 | 5 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-05 | 168 | 6 | 158 | 4 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-06 | 168 | 6 | 158 | 4 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-07 | 168 | 6 | 158 | 4 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-08 | 168 | 6 | 159 | 3 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-09 | 168 | 6 | 159 | 3 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-10 | 168 | 6 | 159 | 3 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-11 | 168 | 6 | 159 | 3 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-12 | 168 | 6 | 160 | 2 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-13 | 168 | 6 | 160 | 2 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-14 | 168 | 6 | 160 | 2 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-15 | 168 | 6 | 160 | 2 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-16 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-17 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-18 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-19 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-20 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-21 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-22 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-23 | 168 | 6 | 161 | 1 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-24 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-25 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-26 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-27 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-28 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-29 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-30 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-03-31 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-01 | 168 | 6 | 168 | -6 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-02 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-03 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-04 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-05 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-06 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-07 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-08 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-09 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-10 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-11 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-12 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-13 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-14 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-15 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-16 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-17 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-18 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-19 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-20 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-21 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-22 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-23 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-24 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-25 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-26 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-27 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-28 | 168 | 6 | 162 | 0 |
| Hainan | China | 19.1959 | 109.7453 | 2020-04-29 | 168 | 6 | 162 | 0 |
datos %>%
filter(Provincia_Estado == 'Hainan', Casos_Enfermos < 0) %>%
mutate(Casos_Recuperados == Casos_Recuperados + Casos_Enfermos,
Casos_Enfermos = 0)
## Provincia_Estado Pais_Region Latitud Longitud Fecha Casos_Confirmados
## 1 Hainan China 19.1959 109.7453 2020-03-24 168
## 2 Hainan China 19.1959 109.7453 2020-03-25 168
## 3 Hainan China 19.1959 109.7453 2020-03-26 168
## 4 Hainan China 19.1959 109.7453 2020-03-27 168
## 5 Hainan China 19.1959 109.7453 2020-03-28 168
## 6 Hainan China 19.1959 109.7453 2020-03-29 168
## 7 Hainan China 19.1959 109.7453 2020-03-30 168
## 8 Hainan China 19.1959 109.7453 2020-03-31 168
## 9 Hainan China 19.1959 109.7453 2020-04-01 168
## Casos_Muertos Casos_Recuperados Casos_Enfermos
## 1 6 168 0
## 2 6 168 0
## 3 6 168 0
## 4 6 168 0
## 5 6 168 0
## 6 6 168 0
## 7 6 168 0
## 8 6 168 0
## 9 6 168 0
## Casos_Recuperados == Casos_Recuperados + Casos_Enfermos
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
Como los casos de Hainan ya hemos descubierto que se debe a un error, lo hemos modificado para no borrarlo.
Como yo quiero todas las filas que pertenecen a Europa, tengo que filtrar en [X,] ya que el seguno concepto son las columnas
# datos_europa <- datos[datos$Latitud > 38 & datos$Longitud > -25 & datos$Longitud < 30, ] # Forma normal de filtrarlo
datos_europa <- datos %>%
filter(Latitud > 38, between(Longitud, -25, 30))
nrow(datos_europa)
## [1] 4455
table(datos_europa$Pais_Region) %>%
as.data.frame() %>% # Transformamos a data frame porque el filter solo se aplica a df
filter(Freq > 0) %>%
kable() %>%
kable_styling()
| Var1 | Freq |
|---|---|
| Albania | 99 |
| Andorra | 99 |
| Austria | 99 |
| Belarus | 99 |
| Belgium | 99 |
| Bosnia and Herzegovina | 99 |
| Bulgaria | 99 |
| Croatia | 99 |
| Czechia | 99 |
| Denmark | 198 |
| Estonia | 99 |
| Finland | 99 |
| France | 99 |
| Germany | 99 |
| Greece | 99 |
| Holy See | 99 |
| Hungary | 99 |
| Iceland | 99 |
| Ireland | 99 |
| Italy | 99 |
| Kosovo | 99 |
| Latvia | 99 |
| Liechtenstein | 99 |
| Lithuania | 99 |
| Luxembourg | 99 |
| Moldova | 99 |
| Monaco | 99 |
| Montenegro | 99 |
| Netherlands | 99 |
| North Macedonia | 99 |
| Norway | 99 |
| Poland | 99 |
| Portugal | 99 |
| Romania | 99 |
| San Marino | 99 |
| Serbia | 99 |
| Slovakia | 99 |
| Slovenia | 99 |
| Spain | 99 |
| Sweden | 99 |
| Switzerland | 99 |
| United Kingdom | 297 |
datos_europa %>%
filter(Fecha == ymd("2020-03-15")) %>%
kable() %>%
kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados | Casos_Enfermos |
|---|---|---|---|---|---|---|---|---|
| Albania | 41.15330 | 20.16830 | 2020-03-15 | 42 | 1 | 0 | 41 | |
| Andorra | 42.50630 | 1.52180 | 2020-03-15 | 1 | 0 | 1 | 0 | |
| Austria | 47.51620 | 14.55010 | 2020-03-15 | 860 | 1 | 6 | 853 | |
| Belarus | 53.70980 | 27.95340 | 2020-03-15 | 27 | 0 | 3 | 24 | |
| Belgium | 50.83330 | 4.00000 | 2020-03-15 | 886 | 4 | 1 | 881 | |
| Bosnia and Herzegovina | 43.91590 | 17.67910 | 2020-03-15 | 24 | 0 | 0 | 24 | |
| Bulgaria | 42.73390 | 25.48580 | 2020-03-15 | 51 | 2 | 0 | 49 | |
| Croatia | 45.10000 | 15.20000 | 2020-03-15 | 49 | 0 | 1 | 48 | |
| Czechia | 49.81750 | 15.47300 | 2020-03-15 | 253 | 0 | 0 | 253 | |
| Faroe Islands | Denmark | 61.89260 | -6.91180 | 2020-03-15 | 11 | 0 | 0 | 11 |
| Denmark | 56.26390 | 9.50180 | 2020-03-15 | 864 | 2 | 1 | 861 | |
| Estonia | 58.59530 | 25.01360 | 2020-03-15 | 171 | 0 | 1 | 170 | |
| Finland | 64.00000 | 26.00000 | 2020-03-15 | 244 | 0 | 10 | 234 | |
| France | 46.22760 | 2.21370 | 2020-03-15 | 4499 | 91 | 12 | 4396 | |
| Germany | 51.00000 | 9.00000 | 2020-03-15 | 5795 | 11 | 46 | 5738 | |
| Greece | 39.07420 | 21.82430 | 2020-03-15 | 331 | 4 | 8 | 319 | |
| Holy See | 41.90290 | 12.45340 | 2020-03-15 | 1 | 0 | 0 | 1 | |
| Hungary | 47.16250 | 19.50330 | 2020-03-15 | 32 | 1 | 1 | 30 | |
| Iceland | 64.96310 | -19.02080 | 2020-03-15 | 171 | 5 | 8 | 158 | |
| Ireland | 53.14240 | -7.69210 | 2020-03-15 | 129 | 2 | 0 | 127 | |
| Italy | 43.00000 | 12.00000 | 2020-03-15 | 24747 | 1809 | 2335 | 20603 | |
| Latvia | 56.87960 | 24.60320 | 2020-03-15 | 30 | 0 | 1 | 29 | |
| Liechtenstein | 47.14000 | 9.55000 | 2020-03-15 | 4 | 0 | 0 | 4 | |
| Lithuania | 55.16940 | 23.88130 | 2020-03-15 | 12 | 0 | 1 | 11 | |
| Luxembourg | 49.81530 | 6.12960 | 2020-03-15 | 59 | 1 | 0 | 58 | |
| Moldova | 47.41160 | 28.36990 | 2020-03-15 | 23 | 0 | 0 | 23 | |
| Monaco | 43.73330 | 7.41670 | 2020-03-15 | 2 | 0 | 0 | 2 | |
| Montenegro | 42.50000 | 19.30000 | 2020-03-15 | 0 | 0 | 0 | 0 | |
| Netherlands | 52.13260 | 5.29130 | 2020-03-15 | 1135 | 20 | 2 | 1113 | |
| North Macedonia | 41.60860 | 21.74530 | 2020-03-15 | 14 | 0 | 1 | 13 | |
| Norway | 60.47200 | 8.46890 | 2020-03-15 | 1221 | 3 | 1 | 1217 | |
| Poland | 51.91940 | 19.14510 | 2020-03-15 | 119 | 3 | 0 | 116 | |
| Portugal | 39.39990 | -8.22450 | 2020-03-15 | 245 | 0 | 2 | 243 | |
| Romania | 45.94320 | 24.96680 | 2020-03-15 | 131 | 0 | 9 | 122 | |
| San Marino | 43.94240 | 12.45780 | 2020-03-15 | 101 | 5 | 4 | 92 | |
| Serbia | 44.01650 | 21.00590 | 2020-03-15 | 48 | 0 | 0 | 48 | |
| Slovakia | 48.66900 | 19.69900 | 2020-03-15 | 54 | 0 | 0 | 54 | |
| Slovenia | 46.15120 | 14.99550 | 2020-03-15 | 219 | 1 | 0 | 218 | |
| Spain | 40.00000 | -4.00000 | 2020-03-15 | 7798 | 289 | 517 | 6992 | |
| Sweden | 63.00000 | 16.00000 | 2020-03-15 | 1022 | 3 | 1 | 1018 | |
| Switzerland | 46.81820 | 8.22750 | 2020-03-15 | 2200 | 14 | 4 | 2182 | |
| Channel Islands | United Kingdom | 49.37230 | -2.36440 | 2020-03-15 | 3 | 0 | 0 | 3 |
| Isle of Man | United Kingdom | 54.23610 | -4.54810 | 2020-03-15 | 0 | 0 | 0 | 0 |
| United Kingdom | 55.37810 | -3.43600 | 2020-03-15 | 1140 | 21 | 18 | 1101 | |
| Kosovo | 42.60264 | 20.90298 | 2020-03-15 | 0 | 0 | 0 | 0 |
\[ d(x, y) = /sqrt\] Aqui se hace una funcion como distancia euclidea para ver a que distancia tenia contagiados desde postdam
distancia_grados = function(x, y){
sqrt((x[1]-y[1])^2 + (x[2]- y[2])^2)
}
distancia_grados_postdam = function(x){
postdam = c(52.366956, 13.906734)
distancia_grados(x, postdam)
}
dist_postdam = apply(cbind(datos_europa$Latitud, datos_europa$Longitud),
MARGIN = 1,
FUN = distancia_grados_postdam) # CBIND junta las columnas
datos_europa %<>%
mutate(dist_postdam = dist_postdam)
datos_europa %>%
filter(between(Fecha, dmy("2-3-2020"), dmy("7-3-2020")),
dist_postdam < 4) %>%
kable() %>% kable_styling()
| Provincia_Estado | Pais_Region | Latitud | Longitud | Fecha | Casos_Confirmados | Casos_Muertos | Casos_Recuperados | Casos_Enfermos | dist_postdam |
|---|---|---|---|---|---|---|---|---|---|
| Czechia | 49.8175 | 15.473 | 2020-03-02 | 3 | 0 | 0 | 3 | 2.992142 | |
| Czechia | 49.8175 | 15.473 | 2020-03-03 | 5 | 0 | 0 | 5 | 2.992142 | |
| Czechia | 49.8175 | 15.473 | 2020-03-04 | 8 | 0 | 0 | 8 | 2.992142 | |
| Czechia | 49.8175 | 15.473 | 2020-03-05 | 12 | 0 | 0 | 12 | 2.992142 | |
| Czechia | 49.8175 | 15.473 | 2020-03-06 | 18 | 0 | 0 | 18 | 2.992142 | |
| Czechia | 49.8175 | 15.473 | 2020-03-07 | 19 | 0 | 0 | 19 | 2.992142 |
Vamos a hacer un mapa con rnaturalearth
#world <- ne_countries(scale ="medium", returnclass = "sf")
#datos$Pais_Region = factor(datos$Pais_Region, levels = c(levels(datos$Pais_Region), "United States"))
#datos[datos$Pais_Region =="US",]$Pais_Region = "United States"
#world %>%
# inner_join(datos, by = c("name" = "Pais_Region")) %>%
#filter(Fecha == dmy("30-03-2020")) %>% # Aqui cruzamos la tabla world con la de datos para que coincidan el name de la tabla world con el nombre de mi tabla datos
#ggplot() +
#geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
#coord_sf(crs = "+proj=laea +lat_0=50 +lan_0=10[ +units=m +ellps=GRS80") +
#scale_fill_viridis_c(option = "plasma", trans = "sqrt") +
#xlab("Longitud") + ylab("Latitud") +
#ggtitle("Mapa del mundo", subtitle = "COVID19")
Si lo que quiero es representar un mapa directamente con los datos que yo tengo y de forma minimalista, procedemos de la siguiente manera:
datos %>%
filter(Fecha == dmy("30-03-2020")) %>%
ggplot(aes(Longitud, Latitud)) +
geom_point(aes(size = Casos_Confirmados, color = Casos_Muertos)) +
coord_fixed() +
theme(legend.position = "bottom")
# Para que la estetica de los puntos sea directamente proporcional a los casos confirmados. El coord fixed estira el mapa
# Se puede pasar a logaritmo los casos confirmados y muertos para tener el mapa con puntos mas grandes: size = log(Casos_Confirmados +1), color = log(Casos_Muertos+1))
MAPAS INTERACTIVOS con GGPLOTLY
world <- ne_countries(scale ="medium", returnclass = "sf")
datos$Pais_Region = factor(datos$Pais_Region, levels = c(levels(datos$Pais_Region), "United States"))
datos[datos$Pais_Region =="US",]$Pais_Region = "United States"
world %>%
inner_join(datos, by = c("name" = "Pais_Region")) %>%
filter(Fecha == dmy("30-03-2020")) %>% # Aqui cruzamos la tabla world con la de datos para que coincidan el name de la tabla world con el nombre de mi tabla datos
ggplot() +
geom_sf(color = "black", aes(fill = Casos_Confirmados)) +
# coord_sf(crs = "+proj=laea +lat_0=50 +lan_0=10[ +units=m +ellps=GRS80") +
scale_fill_viridis_c(option = "plasma", trans = "sqrt") +
xlab("Longitud") + ylab("Latitud") +
ggtitle("Mapa del mundo", subtitle = "COVID19") -> g
## Warning: Column `name`/`Pais_Region` joining character vector and factor,
## coercing into character vector
ggplotly(g)