curl::curl_download('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/cdc.Rdata', 'cdc.Rdata')
load('cdc.Rdata')
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
g <- ggplot(cdc)
g +
aes(x = weight, y= height, color = factor(gender)) +
geom_point()
g +
aes(x = weight, y= wtdesire, color = factor(gender)) +
geom_point() +
geom_smooth(method = 'lm') +
xlim(200,300) +
ylim(100,200)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16963 rows containing non-finite values (stat_smooth).
## Warning: Removed 16963 rows containing missing values (geom_point).
g +
aes(x = weight) +
geom_histogram(binwidth = 10, color ='black') +
xlim(0,1000)
## Warning: Removed 2 rows containing missing values (geom_bar).
g +
aes(x = weight) +
geom_histogram(binwidth = 10, color ='black') +
coord_cartesian(ylim =c(0,1000))
g +
aes(x = weight) +
geom_histogram(binwidth = 10, color ='black') +
xlim(400,500)
## Warning: Removed 19992 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
g +
aes(x = weight, y = wtdesire, color = factor(gender)) +
geom_point() +
geom_smooth(method = 'lm') +
facet_grid(gender ~ genhlth) +
coord_cartesian(ylim = c(100,200))
## `geom_smooth()` using formula 'y ~ x'
## 確診案例
confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)
##死亡案例
deaths_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
deaths_file <- 'time_series_covid19_deaths_global.csv'
download.file(deaths_url, deaths_file)
## 康復案例
recovered_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
recovered_file <- 'time_series_covid19_recovered_global.csv'
download.file(recovered_url, recovered_file)
library(tidyverse)
recovered_df <- read_csv('time_series_covid19_recovered_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
recovered_df$Case <- 'recovered'
dim(recovered_df)
## [1] 253 222
deaths_df <- read_csv('time_series_covid19_deaths_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
deaths_df$Case <- 'deaths'
dim(deaths_df)
## [1] 266 222
confirmed_df <- read_csv('time_series_covid19_confirmed_global.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
confirmed_df$Case <- 'confirmed'
dim(confirmed_df)
## [1] 266 222
merged_df <- list(confirmed = confirmed_df, deaths = deaths_df, recovered = recovered_df)
covid19_ts_df <- do.call(rbind, merged_df)
dim(covid19_ts_df)
## [1] 785 222
col_names <- colnames(covid19_ts_df)
#col_names
date_cols <- col_names[5:(length(col_names) - 1) ]
#date_cols
covid19_tidy_df <- covid19_ts_df %>%
gather(Date, Case_Number, date_cols)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
head(covid19_tidy_df)
## # A tibble: 6 x 7
## `Province/State` `Country/Region` Lat Long Case Date Case_Number
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 confirmed 1/22/… 0
## 2 <NA> Albania 41.2 20.2 confirmed 1/22/… 0
## 3 <NA> Algeria 28.0 1.66 confirmed 1/22/… 0
## 4 <NA> Andorra 42.5 1.52 confirmed 1/22/… 0
## 5 <NA> Angola -11.2 17.9 confirmed 1/22/… 0
## 6 <NA> Antigua and Barbuda 17.1 -61.8 confirmed 1/22/… 0
covid19_tidy_df$Date <- as.Date(covid19_tidy_df$Date, format = '%m/%d/%y')
head(covid19_tidy_df)
## # A tibble: 6 x 7
## `Province/State` `Country/Region` Lat Long Case Date Case_Number
## <chr> <chr> <dbl> <dbl> <chr> <date> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 confi… 2020-01-22 0
## 2 <NA> Albania 41.2 20.2 confi… 2020-01-22 0
## 3 <NA> Algeria 28.0 1.66 confi… 2020-01-22 0
## 4 <NA> Andorra 42.5 1.52 confi… 2020-01-22 0
## 5 <NA> Angola -11.2 17.9 confi… 2020-01-22 0
## 6 <NA> Antigua and Barbu… 17.1 -61.8 confi… 2020-01-22 0
filtered_df <- covid19_tidy_df %>%
filter(`Country/Region` %in% c('US', 'Taiwan*'))
g <- ggplot(filtered_df)
#head(filtered_df)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
g +
aes(x = Date, y = Case_Number, color = factor(`Case`)) +
geom_line() +
facet_grid(`Country/Region` ~ `Case`) +
xlim(as.Date('2020-01-01'),as.Date('2020-09-01')) +
scale_y_continuous(labels = scales::comma) +
scale_y_log10()
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Warning: Transformation introduced infinite values in continuous y-axis
## Theme
library(showtext)
## Warning: package 'showtext' was built under R version 4.0.2
## Loading required package: sysfonts
## Warning: package 'sysfonts' was built under R version 4.0.2
## Loading required package: showtextdb
## Warning: package 'showtextdb' was built under R version 4.0.2
showtext_auto(enable = TRUE)
font_add("Microsoft Jhenghei", "Microsoft Jhenghei.ttf")
plot(runif(10), xlab = "橫軸", ylab = "縱軸", main = "中文標題",
family = "Microsoft Jhenghei")
text(5, 0.7, "Jhenghei", cex = 3, family = "Microsoft Jhenghei")
g +
aes(x = Date, y = Case_Number, color = factor(`Case`)) +
geom_line() +
facet_grid(`Country/Region` ~ `Case`) +
xlim(as.Date('2020-01-01'),as.Date('2020-09-01')) +
scale_y_continuous(labels = scales::comma) +
xlab('日期') +
ylab('數量') +
ggtitle('台灣 v.s. 美國案例數量')
g +
aes(x = Date, y = Case_Number, color = factor(`Case`)) +
geom_line() +
facet_grid(`Country/Region` ~ `Case`) +
xlim(as.Date('2020-01-01'),as.Date('2020-09-01')) +
scale_y_continuous(labels = scales::comma) +
xlab('日期') +
ylab('數量') +
ggtitle('台灣 v.s. 美國案例數量') +
theme(axis.title.x = element_text(color = 'DarkGreen', size = 10),
axis.title.y = element_text(color = 'Red', size = 10),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 20, hjust = 0.5))
w <- ggplot(cdc)
w1 <- w + aes(x = weight, y = wtdesire, color = gender) + geom_point() + xlab('體重') + ylab('理想體重') + ggtitle('體重 v.s. 理想體重') +
scale_color_manual(name ='性別',labels = c("MALE", "FEMALE"), values = c("blue", "red")) +
theme(legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
legend.position = c(1,1),
legend.justification = c(1,1))
w1
ggsave(w1, file= 'test.png')
## Saving 7 x 5 in image
ggsave(w1, file="test2.png", width=6, height=4)
filtered_df <- covid19_tidy_df %>%
filter(`Country/Region` == 'Taiwan*')
g2 <- g <- ggplot(filtered_df) +
aes(x = Date, y = Case_Number, color = `Case`) +
geom_line() +
facet_grid(`Case`~.) +
xlab('日期') +
ylab('個案數量') +
ggtitle('台灣近期新冠肺炎個案人數') +
scale_color_manual(name ='個案情況',labels = c("確診", "死亡", '康復'), values = c('blue', 'green', 'red')) +
theme(plot.title = element_text(size = 20, hjust = 0.5)
)
#g2
ggsave(g2, file = 'taiwan_covid19.png')
## Saving 7 x 5 in image
#install.packages("dostats")
#library(dostats)
user_case_staging <- data.frame(感染國家 = c('TW', 'US', NA, 'US'), 備註= c(NA, '加州', '美國', '費城'))
match('美國', user_case_staging$備註)
## [1] 3
user_case_staging$感染國家[grepl('美國', user_case_staging$備註)] = 'US'
user_case_production <- user_case_staging
user_case_staging <- data.frame(感染國家 = c('TW', 'US', NA, 'US'), 備註= c(NA, '加州', '美國', '費城'))
user_case_production <- user_case_staging %>%
mutate(個案國家 = ifelse(((`感染國家` == 'US') | grepl('美國',`備註`)), 'US', `感染國家` ))
user_case_production
## 感染國家 備註 個案國家
## 1 TW <NA> TW
## 2 US 加州 US
## 3 <NA> 美國 US
## 4 US 費城 US
##ggmap
#install.packages('ggmap')
#library(ggmap)
#?register_google
#map <- get_map(location = 'Taiwan', zoom = 7)
#ggmap(map)
#install.packages('leaflet')
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.0.2
library(leaflet)
m <- leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=121, lat=25, popup="Taiwan")
max(covid19_tidy_df$Date)
## [1] "2020-08-25"
covid19_lastest_confirmed_df <-
covid19_tidy_df %>%
filter(`Date` == '2020-08-25', `Case` == 'confirmed')
head(covid19_lastest_confirmed_df)
## # A tibble: 6 x 7
## `Province/State` `Country/Region` Lat Long Case Date Case_Number
## <chr> <chr> <dbl> <dbl> <chr> <date> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 confi… 2020-08-25 38070
## 2 <NA> Albania 41.2 20.2 confi… 2020-08-25 8759
## 3 <NA> Algeria 28.0 1.66 confi… 2020-08-25 42228
## 4 <NA> Andorra 42.5 1.52 confi… 2020-08-25 1060
## 5 <NA> Angola -11.2 17.9 confi… 2020-08-25 2283
## 6 <NA> Antigua and Barbu… 17.1 -61.8 confi… 2020-08-25 94
?addCircleMarkers
leaflet(covid19_lastest_confirmed_df) %>% addTiles() %>%
addCircleMarkers(
lat = covid19_lastest_confirmed_df$Lat,
lng = covid19_lastest_confirmed_df$Long,
radius = log(covid19_lastest_confirmed_df$Case_Number),
color = ifelse(covid19_lastest_confirmed_df$Case_Number >=1000000 , 'red', 'blue'),
stroke = FALSE,
fillOpacity = 0.5
)
dengue <- read_csv('https://od.cdc.gov.tw/eic/Dengue_Daily_last12m.csv')
## Parsed with column specification:
## cols(
## .default = col_character(),
## 發病日 = col_date(format = ""),
## 個案研判日 = col_date(format = ""),
## 通報日 = col_date(format = ""),
## 確定病例數 = col_double(),
## 內政部居住縣市代碼 = col_double(),
## 內政部居住鄉鎮代碼 = col_double()
## )
## See spec(...) for full column specifications.
head(dengue)
## # A tibble: 6 x 26
## 發病日 個案研判日 通報日 性別 年齡層 居住縣市 居住鄉鎮 居住村里
## <date> <date> <date> <chr> <chr> <chr> <chr> <chr>
## 1 2019-08-01 2019-08-03 2019-08-02 男 35-39 高雄市 苓雅區 福康里
## 2 2019-08-01 2019-08-03 2019-08-02 男 15-19 桃園市 龜山區 陸光里
## 3 2019-08-01 2019-08-03 2019-08-02 女 40-44 桃園市 觀音區 樹林里
## 4 2019-08-01 2019-08-06 2019-08-06 男 45-49 高雄市 仁武區 八卦里
## 5 2019-08-01 2019-08-13 2019-08-06 女 15-19 高雄市 左營區 新下里
## 6 2019-08-02 2019-08-03 2019-08-02 女 45-49 桃園市 龜山區 陸光里
## # … with 18 more variables: 最小統計區 <chr>, 最小統計區中心點X <chr>,
## # 最小統計區中心點Y <chr>, 一級統計區 <chr>, 二級統計區 <chr>,
## # 感染縣市 <chr>, 感染鄉鎮 <chr>, 感染村里 <chr>, 是否境外移入 <chr>,
## # 感染國家 <chr>, 確定病例數 <dbl>, 居住村里代碼 <chr>, 感染村里代碼 <chr>,
## # 血清型 <chr>, 內政部居住縣市代碼 <dbl>, 內政部居住鄉鎮代碼 <dbl>,
## # 內政部感染縣市代碼 <chr>, 內政部感染鄉鎮代碼 <chr>
dengue$最小統計區中心點X <- as.double(dengue$最小統計區中心點X)
## Warning: NAs introduced by coercion
dengue$最小統計區中心點Y <- as.double(dengue$最小統計區中心點Y)
## Warning: NAs introduced by coercion
formattext <- function(gender, age, type){
sprintf('性別:%s<br>年紀:%s<br>型別:%s', gender, age, type)
}
leaflet(dengue) %>% addTiles() %>% addMarkers(
lat = dengue$最小統計區中心點Y,
lng = dengue$最小統計區中心點X,
label=as.character(dengue$血清型),
popup = as.character(formattext(dengue$性別, dengue$年齡層,dengue$血清型)),
clusterOptions = markerClusterOptions()
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
leaflet(dengue) %>% addProviderTiles(providers$CartoDB.Positron) %>% addMarkers(
lat = dengue$最小統計區中心點Y,
lng = dengue$最小統計區中心點X,
label=as.character(dengue$血清型),
popup = as.character(formattext(dengue$性別, dengue$年齡層,dengue$血清型)),
clusterOptions = markerClusterOptions()
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
download.file('https://od.cdc.gov.tw/eic/DengueCluster_Taoyuan.geojson', 'DengueCluster_Taoyuan.geojson')
#install.packages('geojsonio')
library(geojsonio)
## Warning: package 'geojsonio' was built under R version 4.0.2
##
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
##
## pretty
dengue_geo <- geojsonio::geojson_read("DengueCluster_Taoyuan.geojson", what = "sp")
leaflet(dengue_geo) %>%
addTiles() %>%
addPolygons(stroke = FALSE, smoothFactor = 0.3, fillOpacity = 0.5,
fillColor = 'red')
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
taiwan_filtered_df <- covid19_tidy_df %>%
filter((`Country/Region` == 'Taiwan*') & (`Case` == 'confirmed') )
#taiwan_filtered_df
fig<-plot_ly(taiwan_filtered_df, x = ~Date, y = ~Case_Number, type = 'scatter', mode = 'lines+markers')
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
taiwan_filtered_df <- covid19_tidy_df %>%
filter((`Country/Region` == 'Taiwan*') )
head(taiwan_filtered_df)
## # A tibble: 6 x 7
## `Province/State` `Country/Region` Lat Long Case Date Case_Number
## <chr> <chr> <dbl> <dbl> <chr> <date> <dbl>
## 1 <NA> Taiwan* 23.7 121 confirmed 2020-01-22 1
## 2 <NA> Taiwan* 23.7 121 deaths 2020-01-22 0
## 3 <NA> Taiwan* 23.7 121 recovered 2020-01-22 0
## 4 <NA> Taiwan* 23.7 121 confirmed 2020-01-23 1
## 5 <NA> Taiwan* 23.7 121 deaths 2020-01-23 0
## 6 <NA> Taiwan* 23.7 121 recovered 2020-01-23 0
taiwan_spread_df<-taiwan_filtered_df %>%
spread(key = `Case`, value = `Case_Number`)
fig<-plot_ly(taiwan_spread_df, x = ~Date, y = ~confirmed, type = 'scatter', mode = 'lines+markers', name='確診')
fig %>%
add_trace(y = ~recovered, mode = 'lines+markers', name = '康復') %>%
add_trace(y = ~deaths, mode = 'lines+markers', name = '死亡')
#?spread
library(plotly)
ds <- data.frame(labels = c("A", "B", "C"),
values = c(10, 20, 30))
plot_ly(ds, labels = ds$labels, values = ds$values, type = "pie", hole=0.6) %>%
layout(title = "Donut Chart Example")
library(plotly)
month<- c(1,2,3,4,5)
taipei <- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = tainan, fill = "tozeroy",name="tainan") %>%
layout(yaxis = list(title = 'rainfall') )
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
y <- list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%
add_trace(x = month, y = total, fill = "tonexty", name="tainan")
data("diamonds")
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x = d$carat, y= d$price, color = d$clarity, type='scatter', mode= 'markers', size = d$carat, text= paste("Clarity", d$clarity))
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
p <- subplot(
plot_ly(taiwan_spread_df, x = ~Date, y = ~confirmed, type='scatter', mode = 'lines'),
plot_ly(taiwan_spread_df, x = ~Date, y = ~recovered, type='scatter', mode = 'lines'),
plot_ly(taiwan_spread_df, x = ~Date, y = ~deaths, type='scatter', mode = 'lines'),
margin=0.05
)
p %>% layout(showlegend=FALSE)
p <- subplot(
plot_ly(taiwan_spread_df, x = ~Date, y = ~confirmed, type='scatter', mode = 'lines'),
plot_ly(taiwan_spread_df, x = ~Date, y = ~recovered, type='scatter', mode = 'lines'),
plot_ly(taiwan_spread_df, x = ~Date, y = ~deaths, type='scatter', mode = 'lines'),
margin=0.05,
nrows = 3
)
p %>% layout(showlegend=FALSE)
filtered_df <- covid19_tidy_df %>%
filter(`Country/Region` == 'Taiwan*')
g2 <- ggplot(filtered_df) +
aes(x = Date, y = Case_Number, color = `Case`) +
geom_line() +
facet_grid(`Case`~.) +
xlab('日期') +
ylab('個案數量') +
ggtitle('台灣近期新冠肺炎個案人數') +
scale_color_manual(name ='個案情況',labels = c("確診", "死亡", '康復'), values = c('blue', 'green', 'red')) +
theme(plot.title = element_text(size = 20, hjust = 0.5)
)
fig <- ggplotly(g2)
fig
top10 <- covid19_tidy_df %>%
filter((`Date` == '2020-08-25') & `Case` == 'confirmed') %>%
arrange(desc(Case_Number)) %>%
head(10)
library(scales)
g <- ggplot(top10) +
geom_bar(aes(x=reorder(`Country/Region`, -Case_Number),y=Case_Number),stat="identity", fill= 'blue') +
xlab('國家') +
ylab('確診數') +
ggtitle('確診數排名前10國家') +
scale_y_continuous(labels = scales::comma)
fig <- ggplotly(g)
fig