\(~\)
1. CARREGAR PACOTES
library(tidyverse)
library(data.table)
library(lubridate)
library(hablar)
library(openxlsx)
library(forecast)
#library(coronabr)
library(brazilmaps)
library(sf)
library(leaflet)
library(xts)
library(dygraphs)
#library(fable)
\(~\)
2. IMPORTAR DADOS
dados = "http://plataforma.saude.gov.br/novocoronavirus/resources/scripts/database.js" %>%
readr::read_file() %>%
stringr::str_remove("var database=") %>%
jsonlite::fromJSON() %>%
purrr::pluck("brazil") %>%
dplyr::mutate(values = purrr::map(values, dplyr::mutate_all, as.character)) %>%
tidyr::unnest(values) %>%
dplyr::mutate(date = lubridate::dmy(date))
dados = fread("C:/Users/Ronaldo Alves/Desktop/COVID_19/coronavirus2.csv")
\(~\)
3. CURADORIA DOS DADOS
uid = data.frame(uid = c(11:17,21:29,31:33,35,41:43,50:53))
uid = setDT(uid)[, list(date = seq(min(ymd(dados$date)), max(ymd(dados$date)), by = "days")),
by = uid]
dados = dados %>%
mutate(date = ymd(date)) %>% # uid = as.factor(uid)
full_join(uid, by = c("uid" = "uid", "date" = "date")) %>%
group_by(uid, date) %>%
summarise_all(sum_) %>%
mutate(id_date = dense_rank(date)) %>%
arrange(uid, date) %>%
ungroup() %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(sigla = case_when(uid == 11 ~ "RO", uid == 12 ~ "AC", uid == 13 ~ "AM", uid == 14 ~ "RR",
uid == 15 ~ "PA", uid == 16 ~ "AP", uid == 17 ~ "TO", uid == 21 ~ "MA",
uid == 22 ~ "PI", uid == 23 ~ "CE", uid == 24 ~ "RN", uid == 25 ~ "PB",
uid == 26 ~ "PE", uid == 27 ~ "AL", uid == 28 ~ "SE", uid == 29 ~ "BA",
uid == 31 ~ "MG", uid == 32 ~ "ES", uid == 33 ~ "RJ", uid == 35 ~ "SP",
uid == 41 ~ "PR", uid == 42 ~ "SC", uid == 43 ~ "RS", uid == 50 ~ "MS",
uid == 51 ~ "MT", uid == 52 ~ "GO", uid == 53 ~ "DF")) %>%
select(id_date, uid, sigla, everything())
rm(uid)
dados
\(~\)
5. ANÁLISE DESCRITIVA
dados %>%
summarise(confirmados = sum_(cases),
mortes = sum_(deaths),
estados = n_distinct(uid))
dados_uf = dados %>%
group_by(uid, sigla) %>%
summarise(confirmados = sum_(cases)) %>%
ungroup()
dados_uf %>% select(sigla, confirmados) %>% arrange(-confirmados)
\(~\)
6. ANÁLISE ESPACIAL
ggplot(dados_uf, aes(x = reorder(sigla, -confirmados), y = confirmados)) +
geom_bar(stat = "identity", color = "black", fill = "white") +
geom_text(aes(label = confirmados), position = "stack", size = 3, vjust = -0.25) +
scale_y_continuous(limits = c(0, max(dados_uf$confirmados)+100),
breaks = seq(0, max(dados_uf$confirmados)+100, 250),
expand = c(0.01, 0)) +
labs(x = "", y = "# casos confirmados") +
theme_minimal()

map_uf = get_brmap("State") %>%
left_join(dados_uf, by = c("State" = "uid")) %>%
st_centroid()
data.frame(st_coordinates(map_uf),
confirmados = map_uf$confirmados,
UF = map_uf$nome,
sigla = map_uf$sigla) %>% # str_to_title()
filter(confirmados != 0) %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(~X, ~Y, label = ~as.character(paste0(sigla, ":", confirmados, " casos")),
labelOptions = labelOptions(textsize = "12px"), #noHide = T, textOnly = T, direction = "auto"
radius = ~log(confirmados)*2,
stroke = F,
fillOpacity = 0.5)
\(~\)
7. ANÁLISE TEMPORAL
dados %>%
group_by(date) %>%
summarise(N = sum_(cases)) %>%
xts(x = .$N, order.by = .$date) %>%
dygraph(xlab = "Date of report", ylab = "# confirmed cases") %>%
dyOptions(drawPoints = T, pointSize = 4, fillGraph = T, fillAlpha = 0.4) %>%
dyRangeSelector() %>%
dySeries("V1", label = "confirmed cases") %>%
dyLegend(show = "auto", hideOnMouseOut = FALSE)
\(~\)
dados %>%
group_by(date) %>%
summarise(N = sum_(cases)) %>%
filter(date >= "2020-02-26") %>%
ggplot(aes(x = date, y = log(N))) +
geom_point(size = 3, shape = 21, fill = "orange") +
geom_smooth(method = "lm", formula = y ~ x, color = "red") +
scale_x_date(labels = scales::date_format("%d/%b"), breaks = "1 day") +
labs(x = "Date of report", y = "# confirmed cases (log scale)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 0.5))

\(~\)
dados %>%
group_by(date) %>%
summarise(N = sum_(cases)) %>%
filter(date >= "2020-02-26") %>%
mutate(y = log10(N + 1), days = 1:nrow(.)) %>%
lm(y ~ days, data = ., na.action = na.exclude) %>%
summary()
Call:
lm(formula = y ~ days, data = ., na.action = na.exclude)
Residuals:
Min 1Q Median 3Q Max
-1.41457 0.01152 0.09367 0.12422 0.31453
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.1325 0.1536 -0.863 0.399
days 0.1190 0.0117 10.175 2.36e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3481 on 20 degrees of freedom
Multiple R-squared: 0.8381, Adjusted R-squared: 0.83
F-statistic: 103.5 on 1 and 20 DF, p-value: 2.363e-09
\(~\)
dados %>%
#group_by(sigla, date) %>%
#summarise(N = sum_(cases)) %>%
filter(date >= "2020-02-26", sigla %in% c("SP","RJ","DF","RS","PR","PE","MG","SC")) %>%
ggplot(aes(x = date, y = cases, color = reorder(sigla, -cases))) +
geom_point(size = 3) +
geom_line(size = 1) +
scale_color_brewer(palette = "Spectral") +
scale_x_date(labels = scales::date_format("%d/%b"), breaks = "1 day") +
labs(x = "Date of report", y = "# confirmed cases", color = "UF") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 0.5))

\(~\)
