#Leer y describir los datos
#data <- read.csv("WHO.COVID.19.global.data.csv", header= TRUE, sep",")
library(readr)
covid <- read_csv("data/WHO-COVID-19-global-data.csv")
##
## -- Column specification ------------------------------------------------
## cols(
## Date_reported = col_date(format = ""),
## Country_code = col_character(),
## Country = col_character(),
## WHO_region = col_character(),
## New_cases = col_double(),
## Cumulative_cases = col_double(),
## New_deaths = col_double(),
## Cumulative_deaths = col_double()
## )
head(covid)
## # A tibble: 6 x 8
## Date_reported Country_code Country WHO_region New_cases Cumulative_cases
## <date> <chr> <chr> <chr> <dbl> <dbl>
## 1 2020-01-03 AF Afghan~ EMRO 0 0
## 2 2020-01-04 AF Afghan~ EMRO 0 0
## 3 2020-01-05 AF Afghan~ EMRO 0 0
## 4 2020-01-06 AF Afghan~ EMRO 0 0
## 5 2020-01-07 AF Afghan~ EMRO 0 0
## 6 2020-01-08 AF Afghan~ EMRO 0 0
## # ... with 2 more variables: New_deaths <dbl>, Cumulative_deaths <dbl>
str(covid)
## tibble [77,673 x 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Date_reported : Date[1:77673], format: "2020-01-03" "2020-01-04" ...
## $ Country_code : chr [1:77673] "AF" "AF" "AF" "AF" ...
## $ Country : chr [1:77673] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ WHO_region : chr [1:77673] "EMRO" "EMRO" "EMRO" "EMRO" ...
## $ New_cases : num [1:77673] 0 0 0 0 0 0 0 0 0 0 ...
## $ Cumulative_cases : num [1:77673] 0 0 0 0 0 0 0 0 0 0 ...
## $ New_deaths : num [1:77673] 0 0 0 0 0 0 0 0 0 0 ...
## $ Cumulative_deaths: num [1:77673] 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. Date_reported = col_date(format = ""),
## .. Country_code = col_character(),
## .. Country = col_character(),
## .. WHO_region = col_character(),
## .. New_cases = col_double(),
## .. Cumulative_cases = col_double(),
## .. New_deaths = col_double(),
## .. Cumulative_deaths = col_double()
## .. )
#Importar libreria
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
summary(covid)
## Date_reported Country_code Country WHO_region
## Min. :2020-01-03 Length:77673 Length:77673 Length:77673
## 1st Qu.:2020-03-25 Class :character Class :character Class :character
## Median :2020-06-16 Mode :character Mode :character Mode :character
## Mean :2020-06-15
## 3rd Qu.:2020-09-06
## Max. :2020-11-28
## New_cases Cumulative_cases New_deaths Cumulative_deaths
## Min. :-32952.0 Min. : 0 Min. :-514.00 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 0
## Median : 1.0 Median : 380 Median : 0.00 Median : 7
## Mean : 785.8 Mean : 65700 Mean : 18.45 Mean : 2146
## 3rd Qu.: 100.0 3rd Qu.: 8696 3rd Qu.: 1.00 3rd Qu.: 157
## Max. :322072.0 Max. :12763997 Max. :6409.00 Max. :261460
###Histograma de conteo por fecha
hist <- ggplot(covid, aes(x=Date_reported, ))
hist <- hist + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
hist
###Casos de covid por regiones o continente
hist <- ggplot(covid, aes(Date_reported, New_cases, fill=WHO_region))
hist <- hist + geom_bar(stat="identity")
hist <- hist + labs(title="Casos COVID por Región", x="Fecha", y="Conteo de casos")
hist <- hist + theme(axis.text.x = element_text(angle=90, size=6, hjust = 1))
hist
###Gráfica para Colombia
data_colombia <- subset(covid, covid$Country_code=="CO")
#line <- ggplot(data_colombia, aes(x=c(1:nrow(data_colombia)), y=New_cases))
line <- ggplot(data_colombia, aes(x=Date_reported, y=New_cases,
group=Country_code, color=Country_code))
line <- line + geom_line()
line <- line + labs(title="Casos COVID para Colombia", x="Fecha", y="Conteo de casos")
line <- line + theme(axis.text.x = element_text(angle=90, size=6, hjust = 1))
line