Los datos
- Cargar datos de url gihub
- Presenta solo los primeros y últimos seis registros
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/FundamentosMachineLearning/master/datos/covid_19_data.csv")
head(datos)
## SNo ObservationDate Province.State Country.Region Last.Update Confirmed
## 1 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1
## 2 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14
## 3 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6
## 4 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1
## 5 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0
## 6 6 01/22/2020 Guangdong Mainland China 1/22/2020 17:00 26
## Deaths Recovered
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
tail(datos)
## SNo ObservationDate Province.State Country.Region
## 2374 2374 02/25/2020 San Antonio, TX US
## 2375 2375 02/25/2020 Seattle, WA US
## 2376 2376 02/25/2020 Tempe, AZ US
## 2377 2377 02/25/2020 Lackland, TX (From Diamond Princess) US
## 2378 2378 02/25/2020 Omaha, NE (From Diamond Princess) US
## 2379 2379 02/25/2020 Travis, CA (From Diamond Princess) US
## Last.Update Confirmed Deaths Recovered
## 2374 2020-02-13T18:53:02 1 0 0
## 2375 2020-02-09T07:03:04 1 0 1
## 2376 2020-02-25T21:23:03 1 0 1
## 2377 2020-02-24T23:33:02 0 0 0
## 2378 2020-02-24T23:33:02 0 0 0
## 2379 2020-02-24T23:33:02 0 0 0
Explorando datos
summary(datos)
## SNo ObservationDate Province.State Country.Region
## Min. : 1.0 02/25/2020: 94 : 661 Mainland China:1079
## 1st Qu.: 595.5 02/24/2020: 90 Anhui : 35 US : 304
## Median :1190.0 02/23/2020: 85 Beijing : 35 Australia : 120
## Mean :1190.0 02/21/2020: 84 Chongqing: 35 Canada : 82
## 3rd Qu.:1784.5 02/22/2020: 84 Fujian : 35 Hong Kong : 35
## Max. :2379.0 02/19/2020: 76 Gansu : 35 Japan : 35
## (Other) :1866 (Other) :1543 (Other) : 724
## Last.Update Confirmed Deaths
## 1/31/2020 23:59 : 62 Min. : 0.0 Min. : 0.0
## 2020-02-01T19:43:03: 61 1st Qu.: 2.0 1st Qu.: 0.0
## 1/30/20 16:00 : 58 Median : 12.0 Median : 0.0
## 1/29/20 19:30 : 54 Mean : 578.5 Mean : 15.3
## 1/28/20 23:00 : 52 3rd Qu.: 122.0 3rd Qu.: 1.0
## 1/27/20 23:59 : 51 Max. :64786.0 Max. :2563.0
## (Other) :2041
## Recovered
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 1.0
## Mean : 101.5
## 3rd Qu.: 15.0
## Max. :18971.0
##
str(datos)
## 'data.frame': 2379 obs. of 8 variables:
## $ SNo : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ObservationDate: Factor w/ 35 levels "01/22/2020","01/23/2020",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Province.State : Factor w/ 73 levels "","Anhui","Arizona",..: 2 6 12 16 17 18 19 20 21 22 ...
## $ Country.Region : Factor w/ 46 levels "Afghanistan",..: 27 27 27 27 27 27 27 27 27 27 ...
## $ Last.Update : Factor w/ 686 levels "1/22/2020 17:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Confirmed : num 1 14 6 1 0 26 2 1 4 1 ...
## $ Deaths : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Recovered : num 0 0 0 0 0 0 0 0 0 0 ...
unique(datos$Country.Region)
## [1] Mainland China Hong Kong Macau
## [4] Taiwan US Japan
## [7] Thailand South Korea Singapore
## [10] Philippines Malaysia Vietnam
## [13] Australia Mexico Brazil
## [16] Colombia France Nepal
## [19] Canada Cambodia Sri Lanka
## [22] Ivory Coast Germany Finland
## [25] United Arab Emirates India Italy
## [28] UK Russia Sweden
## [31] Spain Belgium Others
## [34] Egypt Iran Israel
## [37] Lebanon Iraq Oman
## [40] Afghanistan Bahrain Kuwait
## [43] Austria Algeria Croatia
## [46] Switzerland
## 46 Levels: Afghanistan Algeria Australia Austria Bahrain Belgium ... Vietnam
class(datos)
## [1] "data.frame"
head(datos)
## SNo ObservationDate Province.State Country.Region Last.Update Confirmed
## 1 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1
## 2 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14
## 3 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6
## 4 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1
## 5 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0
## 6 6 01/22/2020 Guangdong Mainland China 1/22/2020 17:00 26
## Deaths Recovered
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
tail(datos)
## SNo ObservationDate Province.State Country.Region
## 2374 2374 02/25/2020 San Antonio, TX US
## 2375 2375 02/25/2020 Seattle, WA US
## 2376 2376 02/25/2020 Tempe, AZ US
## 2377 2377 02/25/2020 Lackland, TX (From Diamond Princess) US
## 2378 2378 02/25/2020 Omaha, NE (From Diamond Princess) US
## 2379 2379 02/25/2020 Travis, CA (From Diamond Princess) US
## Last.Update Confirmed Deaths Recovered
## 2374 2020-02-13T18:53:02 1 0 0
## 2375 2020-02-09T07:03:04 1 0 1
## 2376 2020-02-25T21:23:03 1 0 1
## 2377 2020-02-24T23:33:02 0 0 0
## 2378 2020-02-24T23:33:02 0 0 0
## 2379 2020-02-24T23:33:02 0 0 0
nrow(datos)
## [1] 2379
ncol(datos)
## [1] 8
Agrupando datos por Pais Region
casos <- datos %>%
group_by(Country.Region) %>%
summarise(casosConfirmados = sum(Confirmed),
casosDecesos = sum(Deaths))
# Ordenar descendente y agregar columna porc = %
casos <- data.frame(arrange(casos, desc(casosConfirmados) ))
casos <- cbind(casos, porc=paste(round(casos$casosDecesos / casos$casosConfirmados * 100,2),'%'))
casos
## Country.Region casosConfirmados casosDecesos porc
## 1 Mainland China 1354794 36187 2.67 %
## 2 Others 7256 15 0.21 %
## 3 South Korea 3650 29 0.79 %
## 4 Japan 1540 13 0.84 %
## 5 Singapore 1467 0 0 %
## 6 Hong Kong 1236 29 2.35 %
## 7 Thailand 863 0 0 %
## 8 Italy 844 23 2.73 %
## 9 Taiwan 531 10 1.88 %
## 10 US 504 0 0 %
## 11 Malaysia 474 0 0 %
## 12 Australia 434 0 0 %
## 13 Germany 378 0 0 %
## 14 Vietnam 352 0 0 %
## 15 Macau 289 0 0 %
## 16 France 283 11 3.89 %
## 17 Iran 252 49 19.44 %
## 18 United Arab Emirates 214 0 0 %
## 19 Canada 188 0 0 %
## 20 UK 173 0 0 %
## 21 India 74 0 0 %
## 22 Philippines 70 24 34.29 %
## 23 Russia 52 0 0 %
## 24 Spain 46 0 0 %
## 25 Nepal 32 0 0 %
## 26 Cambodia 30 0 0 %
## 27 Sri Lanka 30 0 0 %
## 28 Finland 28 0 0 %
## 29 Sweden 26 0 0 %
## 30 Bahrain 24 0 0 %
## 31 Belgium 22 0 0 %
## 32 Egypt 12 0 0 %
## 33 Kuwait 12 0 0 %
## 34 Israel 5 0 0 %
## 35 Lebanon 5 0 0 %
## 36 Oman 4 0 0 %
## 37 Afghanistan 2 0 0 %
## 38 Austria 2 0 0 %
## 39 Iraq 2 0 0 %
## 40 Algeria 1 0 0 %
## 41 Croatia 1 0 0 %
## 42 Ivory Coast 1 0 0 %
## 43 Switzerland 1 0 0 %
## 44 Brazil 0 0 NaN %
## 45 Colombia 0 0 NaN %
## 46 Mexico 0 0 NaN %