Demo20230816

tidyverse

#install.packages('tidyverse')
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr)
exercise_sample <- read_csv("https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/exercise_sample.csv", 
    col_types = cols(report_id = col_character()))
head(exercise_sample)

## # A tibble: 6 × 19
##   report_id      age 是否境外移入 GENDER 通報日              發病日             
##   <chr>        <dbl> <chr>        <chr>  <dttm>              <dttm>             
## 1 10000590510…    55 是           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 2 10000018414…    10 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 3 10000050846…    35 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 4 10000663290…    45 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 5 10000784503…    35 否           M      2023-02-18 00:00:00 2023-02-18 00:00:00
## 6 10000396145…    35 否           M      2023-02-18 00:00:00 2023-02-18 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

exercise_sample[exercise_sample$age >= 60, ]

## # A tibble: 10 × 19
##    report_id     age 是否境外移入 GENDER 通報日              發病日             
##    <chr>       <dbl> <chr>        <chr>  <dttm>              <dttm>             
##  1 1000011467…    95 否           F      2023-02-19 00:00:00 2023-02-19 00:00:00
##  2 1000009427…    70 否           M      2023-02-21 00:00:00 2023-02-21 00:00:00
##  3 1000053363…    60 否           M      2023-02-22 00:00:00 2023-02-22 00:00:00
##  4 1000090701…    75 否           M      2023-02-22 00:00:00 2023-02-22 00:00:00
##  5 1000019326…    60 否           M      2023-02-23 00:00:00 2023-02-23 00:00:00
##  6 1000023009…    60 否           F      2023-02-26 00:00:00 2023-02-26 00:00:00
##  7 1000027849…    70 否           F      2023-02-27 00:00:00 2023-02-27 00:00:00
##  8 1000092338…    60 否           M      2023-02-27 00:00:00 2023-02-27 00:00:00
##  9 1000035867…    75 否           M      2023-03-01 00:00:00 2023-03-01 00:00:00
## 10 1000081792…    70 否           M      2023-03-03 00:00:00 2023-03-03 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

filter(exercise_sample, age > 60)

## # A tibble: 6 × 19
##   report_id      age 是否境外移入 GENDER 通報日              發病日             
##   <chr>        <dbl> <chr>        <chr>  <dttm>              <dttm>             
## 1 10000114675…    95 否           F      2023-02-19 00:00:00 2023-02-19 00:00:00
## 2 10000094271…    70 否           M      2023-02-21 00:00:00 2023-02-21 00:00:00
## 3 10000907018…    75 否           M      2023-02-22 00:00:00 2023-02-22 00:00:00
## 4 10000278493…    70 否           F      2023-02-27 00:00:00 2023-02-27 00:00:00
## 5 10000358675…    75 否           M      2023-03-01 00:00:00 2023-03-01 00:00:00
## 6 10000817923…    70 否           M      2023-03-03 00:00:00 2023-03-03 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

filter(exercise_sample, age > 60, 有無慢性疾病病史及相關危險因子 == '無')

## # A tibble: 3 × 19
##   report_id      age 是否境外移入 GENDER 通報日              發病日             
##   <chr>        <dbl> <chr>        <chr>  <dttm>              <dttm>             
## 1 10000094271…    70 否           M      2023-02-21 00:00:00 2023-02-21 00:00:00
## 2 10000278493…    70 否           F      2023-02-27 00:00:00 2023-02-27 00:00:00
## 3 10000817923…    70 否           M      2023-03-03 00:00:00 2023-03-03 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

exercise_sample %>% 
  filter(age > 60, 有無慢性疾病病史及相關危險因子 == '無')

## # A tibble: 3 × 19
##   report_id      age 是否境外移入 GENDER 通報日              發病日             
##   <chr>        <dbl> <chr>        <chr>  <dttm>              <dttm>             
## 1 10000094271…    70 否           M      2023-02-21 00:00:00 2023-02-21 00:00:00
## 2 10000278493…    70 否           F      2023-02-27 00:00:00 2023-02-27 00:00:00
## 3 10000817923…    70 否           M      2023-03-03 00:00:00 2023-03-03 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

exercise_sample[ , c('report_id', 'age')]

## # A tibble: 100 × 2
##    report_id        age
##    <chr>          <dbl>
##  1 10000590510349    55
##  2 10000018414615    10
##  3 10000050846608    35
##  4 10000663290039    45
##  5 10000784503625    35
##  6 10000396145701    35
##  7 10000898175149    20
##  8 10000058819611    30
##  9 10000114675078    95
## 10 10000717119123    15
## # ℹ 90 more rows

select(exercise_sample, c('report_id', 'age'))

## # A tibble: 100 × 2
##    report_id        age
##    <chr>          <dbl>
##  1 10000590510349    55
##  2 10000018414615    10
##  3 10000050846608    35
##  4 10000663290039    45
##  5 10000784503625    35
##  6 10000396145701    35
##  7 10000898175149    20
##  8 10000058819611    30
##  9 10000114675078    95
## 10 10000717119123    15
## # ℹ 90 more rows

# SELECT report_id, age FROM exercise_sample WHERE age > 60 AND  有無慢性疾病病史及相關危險因子 == '無'
exercise_sample %>%
  filter(age > 60, 有無慢性疾病病史及相關危險因子 == '無') %>%
  select(c('report_id', 'age'))

## # A tibble: 3 × 2
##   report_id        age
##   <chr>          <dbl>
## 1 10000094271942    70
## 2 10000278493497    70
## 3 10000817923826    70

#exercise_sample %>%
#  select(c('report_id', 'age')) %>%
#  filter(age > 60, 有無慢性疾病病史及相關危險因子 == '無')

練習題

請使用dplyr 過濾出Covid 資料集中年齡超過 60 歲(age)、且慢性疾病病史及相關危險因子為無的 report_id ?

exercise_sample %>%
  filter(age > 60  ,  有無慢性疾病病史及相關危險因子 == '無') %>%
  select(report_id)

## # A tibble: 3 × 1
##   report_id     
##   <chr>         
## 1 10000094271942
## 2 10000278493497
## 3 10000817923826

a <- c(2,3,1,5,6,4)
sum(a[a > 3])

## [1] 15

a %>% 
  .[a > 3] %>% 
  sum()

## [1] 15

exercise_sample %>%
  filter((age > 60)  |  (有無慢性疾病病史及相關危險因子 == '無')) %>%
  select(report_id)

## # A tibble: 83 × 1
##    report_id     
##    <chr>         
##  1 10000590510349
##  2 10000018414615
##  3 10000050846608
##  4 10000784503625
##  5 10000058819611
##  6 10000114675078
##  7 10000717119123
##  8 10000512983146
##  9 10000675490671
## 10 10000349588450
## # ℹ 73 more rows

exercise_sample %>%
  filter((age > 60)  &  (有無慢性疾病病史及相關危險因子 == '無')) %>%
  select(report_id)

## # A tibble: 3 × 1
##   report_id     
##   <chr>         
## 1 10000094271942
## 2 10000278493497
## 3 10000817923826

exercise_sample %>%
  filter(!(age > 60)  &  (有無慢性疾病病史及相關危險因子 == '無')) %>%
  select(report_id)

## # A tibble: 77 × 1
##    report_id     
##    <chr>         
##  1 10000590510349
##  2 10000018414615
##  3 10000050846608
##  4 10000784503625
##  5 10000058819611
##  6 10000717119123
##  7 10000512983146
##  8 10000675490671
##  9 10000349588450
## 10 10000679026533
## # ℹ 67 more rows

a <- c(3,2,5,4,6,7,1)

sum(sort(a)[1:3])

## [1] 6

a %>%
  sort() %>%
  .[1:3] %>%
  sum()

## [1] 6

exercise_sample %>%
  select(report_id, age, 職業) %>%
  filter(age > 60) %>%
  arrange(age)

## # A tibble: 6 × 3
##   report_id        age 職業 
##   <chr>          <dbl> <chr>
## 1 10000094271942    70 <NA> 
## 2 10000278493497    70 <NA> 
## 3 10000817923826    70 <NA> 
## 4 10000907018321    75 <NA> 
## 5 10000358675702    75 無業 
## 6 10000114675078    95 <NA>

exercise_sample %>%
  select(report_id, age, 職業) %>%
  filter(age > 60) %>%
  arrange(desc(age))

## # A tibble: 6 × 3
##   report_id        age 職業 
##   <chr>          <dbl> <chr>
## 1 10000114675078    95 <NA> 
## 2 10000907018321    75 <NA> 
## 3 10000358675702    75 無業 
## 4 10000094271942    70 <NA> 
## 5 10000278493497    70 <NA> 
## 6 10000817923826    70 <NA>

exercise_sample %>%
  select(report_id, age, 職業) %>%
  filter(age > 60) %>%
  arrange(desc(age)) %>%
  head()

## # A tibble: 6 × 3
##   report_id        age 職業 
##   <chr>          <dbl> <chr>
## 1 10000114675078    95 <NA> 
## 2 10000907018321    75 <NA> 
## 3 10000358675702    75 無業 
## 4 10000094271942    70 <NA> 
## 5 10000278493497    70 <NA> 
## 6 10000817923826    70 <NA>

exercise_sample %>%
  select(report_id, age, 職業) %>%
  filter(age > 60) %>%
  arrange(desc(age)) %>%
  slice(1:10)

## # A tibble: 6 × 3
##   report_id        age 職業 
##   <chr>          <dbl> <chr>
## 1 10000114675078    95 <NA> 
## 2 10000907018321    75 <NA> 
## 3 10000358675702    75 無業 
## 4 10000094271942    70 <NA> 
## 5 10000278493497    70 <NA> 
## 6 10000817923826    70 <NA>

exercise_sample <- exercise_sample %>%
  mutate(職業 = if_else(is.na(職業), '無', 職業)) 
  
head(exercise_sample)

## # A tibble: 6 × 19
##   report_id      age 是否境外移入 GENDER 通報日              發病日             
##   <chr>        <dbl> <chr>        <chr>  <dttm>              <dttm>             
## 1 10000590510…    55 是           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 2 10000018414…    10 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 3 10000050846…    35 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 4 10000663290…    45 否           F      2023-02-18 00:00:00 2023-02-18 00:00:00
## 5 10000784503…    35 否           M      2023-02-18 00:00:00 2023-02-18 00:00:00
## 6 10000396145…    35 否           M      2023-02-18 00:00:00 2023-02-18 00:00:00
## # ℹ 13 more variables: 研判日 <dttm>, 縣市 <chr>, 鄉鎮市區 <chr>, 職業 <chr>,
## #   有無慢性疾病病史及相關危險因子 <chr>, 是否使用ECMO <chr>,
## #   `首次使>用ECMO日` <lgl>, 是否插管 <chr>, 首次插管日 <lgl>,
## #   首次入住加護病房日 <chr>, 快篩 <chr>, PCR <chr>, CT值 <lgl>

a <- c(1,2,3,NA, 4,5, NA)
mean(a)

## [1] NA

?mean

mean(a, na.rm = TRUE)

## [1] 3

# SELECT 縣市 AVG(age) AS 平均年齡 FROM exercise_sample GROUP BY 縣市 ORDER BY 平均年齡

exercise_sample %>%
  group_by(縣市) %>%
  summarise(平均年齡 = mean(age, na.rm=TRUE)) %>%
  arrange(平均年齡)

## # A tibble: 9 × 2
##   縣市   平均年齡
##   <chr>     <dbl>
## 1 屏東縣     20  
## 2 台中市     25  
## 3 台南市     25  
## 4 嘉義縣     25  
## 5 雲林縣     25  
## 6 桃園市     30  
## 7 新北市     31  
## 8 連江縣     31.8
## 9 高雄市     32.5

練習題

假設有一份COVID-19資料集，其中記錄了多位患者的基本資料，包括姓名、年齡、性別等。現在，我們希望你寫一個程式來根據欄位“性別”（“GENDER”）與“年齡”（“age”）統計各性別病患的平均年齡？

# SELECT GENDER, AVG(age) FROM exercise_sample GROUP BY GENDER;
exercise_sample %>%
  group_by(GENDER) %>%
  summarize(mean_age = mean(age, na.rm= TRUE))

## # A tibble: 2 × 2
##   GENDER mean_age
##   <chr>     <dbl>
## 1 F          30.2
## 2 M          32.3

str(exercise_sample)

## tibble [100 × 19] (S3: tbl_df/tbl/data.frame)
##  $ report_id                     : chr [1:100] "10000590510349" "10000018414615" "10000050846608" "10000663290039" ...
##  $ age                           : num [1:100] 55 10 35 45 35 35 20 30 95 15 ...
##  $ 是否境外移入                  : chr [1:100] "是" "否" "否" "否" ...
##  $ GENDER                        : chr [1:100] "F" "F" "F" "F" ...
##  $ 通報日                        : POSIXct[1:100], format: "2023-02-18" "2023-02-18" ...
##  $ 發病日                        : POSIXct[1:100], format: "2023-02-18" "2023-02-18" ...
##  $ 研判日                        : POSIXct[1:100], format: "2023-02-18 02:35:16" "2023-02-18 02:35:12" ...
##  $ 縣市                          : chr [1:100] "連江縣" "連江縣" "連江縣" "連江縣" ...
##  $ 鄉鎮市區                      : chr [1:100] "北竿鄉" "北竿鄉" "北竿鄉" "北竿鄉" ...
##  $ 職業                          : chr [1:100] "無" "無" "無" "無" ...
##  $ 有無慢性疾病病史及相關危險因子: chr [1:100] "無" "無" "無" "有" ...
##  $ 是否使用ECMO                  : chr [1:100] "否" "否" "否" "否" ...
##  $ 首次使>用ECMO日               : logi [1:100] NA NA NA NA NA NA ...
##  $ 是否插管                      : chr [1:100] "否" "否" "否" "否" ...
##  $ 首次插管日                    : logi [1:100] NA NA NA NA NA NA ...
##  $ 首次入住加護病房日            : chr [1:100] "無入住" "無入住" "無入住" "無入住" ...
##  $ 快篩                          : chr [1:100] "陽性" "陽性" "陽性" "陽性" ...
##  $ PCR                           : chr [1:100] "未檢驗" "未檢驗" "未檢驗" "未檢驗" ...
##  $ CT值                          : logi [1:100] NA NA NA NA NA NA ...

mean(exercise_sample$age)

## [1] 31.25

median(exercise_sample$age)

## [1] 30

exercise_sample %>%
  group_by(GENDER) %>%
  summarize(mean_age = mean(age, na.rm= TRUE))

## # A tibble: 2 × 2
##   GENDER mean_age
##   <chr>     <dbl>
## 1 F          30.2
## 2 M          32.3

exercise_sample %>%
  group_by(GENDER) %>%
  summarize(median_age = median(age, na.rm= TRUE))

## # A tibble: 2 × 2
##   GENDER median_age
##   <chr>       <dbl>
## 1 F              30
## 2 M              25

table(exercise_sample$GENDER)

## 
##  F  M 
## 50 50

table(exercise_sample$GENDER) / length(exercise_sample$GENDER)

## 
##   F   M 
## 0.5 0.5

barplot(table(exercise_sample$GENDER))

pie(table(exercise_sample$GENDER))

library(ggplot2)

# 使用 dplyr 套件計算各縣市通報數量
gender_counts <- exercise_sample %>%
 group_by(GENDER) %>%
 summarise(通報數量 = n())


# 使用 ggplot2 套件繪製圓餅圖
ggplot(gender_counts, aes(x = "", y = 通報數量, fill = GENDER)) +
 geom_bar(stat = "identity", width = 1, color = "white") +
 coord_polar(theta = "y") +
 labs(title = "各性別通報數量比例",
   x = NULL, y = NULL,
   fill = "GENDER") +
 theme_minimal() +
 theme(legend.position = "right")

ggplot(gender_counts, aes(x = "", y = 通報數量, fill = GENDER)) +
 geom_bar(stat = "identity", width = 1, color = "white") +
 coord_polar(theta = "y", start = 0) +  # Set start angle to 0
 labs(title = "各性別通報數量比例",
   x = NULL, y = NULL,
   fill = "GENDER") +
 theme_minimal() +
 theme(legend.position = "right") +
 theme(axis.title = element_blank(),  # Remove axis titles
       axis.text = element_blank(),   # Remove axis labels
       axis.ticks = element_blank())  # Remove axis ticks

# Dark mode color palette
dark_palette <- c("#2C3E50", "#E74C3C")

# Create the pie chart with dark mode theme
ggplot(gender_counts, aes(x = "", y = 通報數量, fill = GENDER)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar(theta = "y", start = 0) +
  labs(title = "各性別通報數量比例",
       x = NULL, y = NULL,
       fill = "GENDER") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.background = element_rect(fill = dark_palette[1]),
        plot.background = element_rect(fill = dark_palette[1]),
        plot.title = element_text(color = "white"),
        legend.text = element_text(color = "white"),
        legend.title = element_text(color = "white"),
        legend.background = element_rect(fill = dark_palette[1]),
        plot.margin = unit(c(1, 1, 1, 1), "cm")) +
  scale_fill_manual(values = dark_palette)

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

plot <- plot_ly(gender_counts, labels = ~GENDER, values = ~通報數量, type = "pie")

# 添加標題
plot <- plot %>% layout(
 title = "各性別通報數量比例"
)
# 顯示互動性圓餅圖
plot

a <-  c(150, 155, 160, 162, 168, 171, 173, 175, 178, 182, 185)
stem(a)

## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   15 | 05
##   16 | 028
##   17 | 1358
##   18 | 25

median(a)

## [1] 171

quantile(a, 0.5)

## 50% 
## 171

quantile(a,0.75)

##   75% 
## 176.5

quantile(a,0.25)

## 25% 
## 161

IQR(a)

## [1] 15.5

boxplot(exercise_sample$age)

# y = ax + b
# age = GENDER * a + b
boxplot(exercise_sample$age ~ exercise_sample$GENDER)


exercise <- exercise[exercise$age <= 120, ]
#table(exercise$GENDER)
boxplot(exercise$age ~ exercise$GENDER)
#boxplot(exercise$age ~ exercise$縣市)

boxplot_plotly <- exercise %>%
  plot_ly(x = ~縣市, y = ~age, type = "box") %>%
  layout(title = "各縣市年齡盒狀圖",
         xaxis = list(title = "縣市"),
         yaxis = list(title = "年齡"))
boxplot_plotly

sd(exercise_sample$age)

## [1] 18.87459

var(exercise_sample$age)

## [1] 356.25

library(ggplot2)

# 創建一個數據框
data <- data.frame(x = c(1, 2, 3, 4, 5),
         y = c(10, 8, 6, 4, 2))

# 創建基本散點圖
ggplot(data, aes(x = x, y = y)) +
 geom_point() +
 labs(title = "scatter plot", x = "X", y = "Y")

# 將通報日的時間部分移除，只保留日期部分
exercise_sample <- exercise_sample %>%
 mutate(通報日 = as.Date(通報日))

# 使用 dplyr 套件計算每日的數量
daily_counts <- exercise_sample %>%
 group_by(通報日) %>%
 summarise(每日數量 = n())


daily_counts

## # A tibble: 26 × 2
##    通報日     每日數量
##    <date>        <int>
##  1 2023-02-14        1
##  2 2023-02-18        7
##  3 2023-02-19        2
##  4 2023-02-20        8
##  5 2023-02-21        2
##  6 2023-02-22        9
##  7 2023-02-23        3
##  8 2023-02-24        4
##  9 2023-02-25        2
## 10 2023-02-26        3
## # ℹ 16 more rows

# 使用 ggplot2 套件繪製折線圖
ggplot(daily_counts, aes(x = 通報日, y = 每日數量)) +
 geom_line() +
 labs(title = "Daily Report Count",
   x = "date",
   y = "Count") +
 theme_minimal()

# 使用 ggplot2 套件繪製折線圖
ggplot(daily_counts, aes(x = 通報日, y = 每日數量)) +
 geom_line() +
 labs(title = "Daily Report Count",
   x = "date",
   y = "Count") +
 theme_minimal() +
 theme(
   plot.title = element_text(hjust = 0.5)  # 將標題置中
 )

# 使用 plot_ly 函數繪製互動性折線圖
plot <- plot_ly(daily_counts, x = ~通報日, y = ~每日數量, type = "scatter", mode = "lines")

# 添加標題和軸標籤
plot <- plot %>% layout(
 title = "每日通報數量",
 xaxis = list(title = "日期"),
 yaxis = list(title = "通報數量")
)

# 顯示互動性折線圖
plot

# 使用 dplyr 套件計算各縣市通報數量
city_counts <- exercise_sample %>%
 group_by(縣市) %>%
 summarise(通報數量 = n())

# 使用 ggplot2 套件繪製圓餅圖
ggplot(city_counts, aes(x = "", y = 通報數量, fill = 縣市)) +
 geom_bar(stat = "identity", width = 1, color = "white") +
 coord_polar(theta = "y") +
 labs(title = "各縣市通報數量比例",
   x = NULL, y = NULL,
   fill = "縣市") +
 theme_minimal() +
 theme(legend.position = "right")

# 使用 plot_ly 函數繪製互動性圓餅圖
plot <- plot_ly(city_counts, labels = ~縣市, values = ~通報數量, type = "pie")

# 添加標題
plot <- plot %>% layout(
 title = "各縣市通報數量比例"
)
# 顯示互動性圓餅圖
plot

# 使用 ggplot2 套件繪製長條圖
ggplot(city_counts, aes(x = reorder(縣市, -通報數量), y = 通報數量, fill = 縣市)) +
 geom_bar(stat = "identity") +
 labs(title = "各縣市通報數量",
   x = "縣市",
   y = "通報數量",
   fill = "縣市") +
 theme_minimal() +
 theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
 guides(fill = FALSE) # 隱藏圖例

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 使用 plot_ly 函數繪製長條圖，使用 tickvals 和 ticktext 自訂 x 軸刻度
plot <- plot_ly(city_counts, x = ~縣市, y = ~通報數量, type = "bar") %>%
 layout(
  title = "各縣市通報數量",
  xaxis = list(
   title = "縣市",
   tickvals = ~seq_along(縣市),
   ticktext = ~縣市
  ),
  yaxis = list(title = "通報數量")
 )

# 顯示長條圖
plot

Demo20230816

David Chiu

2023-08-16

tidyverse

練習題

練習題