Q1: 針對各寄生蟲於各國的盛行率作簡單統計,包括平均數,最小值,最大值等

# 增加寄生蟲類別, 合併資料
combined_data <- data_schisto %>% mutate(Parasite = "Schistosoma mansoni") %>%
  bind_rows(data_hookworm %>% mutate(Parasite = "Hookworms")) %>%
  bind_rows(data_ascaris %>% mutate(Parasite = "Ascaris")) %>%
  filter(!is.na(Prevalence)) # 去除NA值

# 計算各寄生蟲盛行率的平均數,中位數,最小值,最大值
Pre_summary <- combined_data %>%
  group_by(Parasite) %>%
  summarise(
    Count = n(),
    Mean_Prevalence = mean(Prevalence),
    Median_Prevalence = median(Prevalence),
    Min_Prevalence = min(Prevalence),
    Max_Prevalence = max(Prevalence)
  )

print(Pre_summary)
## # A tibble: 3 × 6
##   Parasite Count Mean_Prevalence Median_Prevalence Min_Prevalence Max_Prevalence
##   <chr>    <int>           <dbl>             <dbl>          <dbl>          <dbl>
## 1 Ascaris    989           0.102            0.0161              0          0.952
## 2 Hookwor…  1000           0.218            0.0870              0          1    
## 3 Schisto…   589           0.212            0.0656              0          1

Q2: 統計每個國家/寄生蟲組合的案例數

case_counts <- combined_data %>%
  group_by(Country, Parasite) %>%
  summarise(
    Study_Cases = n(),
    .groups = 'drop'
    )


case_counts <- case_counts %>%
  pivot_wider(
    names_from = Parasite,
    values_from = Study_Cases,
    values_fill = 0 # 如果某國家沒有某寄生蟲的案例,則為 0
  ) %>%
  arrange(Country) # 依據國家字母進行排列

print(case_counts)
## # A tibble: 19 × 4
##    Country                          Ascaris Hookworms `Schistosoma mansoni`
##    <chr>                              <int>     <int>                 <int>
##  1 Angola                                38        38                     0
##  2 Burundi                               22        22                     0
##  3 Cameroon                               1         0                     0
##  4 China                                  1         1                     0
##  5 Cote D'Ivoire                          1         2                     0
##  6 Democratic Republic of the Congo       0         0                     1
##  7 Eritrea                               40        40                     0
##  8 Ethiopia                               0         2                     6
##  9 Ghana                                 77        77                     0
## 10 Malawi                                33        33                     0
## 11 Nepal                                  0         2                     0
## 12 Nigeria                               20         4                     0
## 13 Philippines                          132       117                     0
## 14 Senegal                              106       105                     0
## 15 Sierra Leone                          52        52                    52
## 16 South Africa                           0         4                     0
## 17 Uganda                               466       499                   523
## 18 United Republic of Tanzania            0         2                     5
## 19 Zambia                                 0         0                     2

Q3: 添加新column (Total), 計算每個國家的三種寄生蟲案例總和,並降序排列

case_Total <- case_counts %>%
  mutate(
    Total = `Schistosoma mansoni` + Hookworms + Ascaris
  ) %>%
  arrange(desc(Total)) %>%   # 依照 Total 排列
  select(Country, Total, everything()) # 將 Total 移到 Country 後

print(case_Total)
## # A tibble: 19 × 5
##    Country                         Total Ascaris Hookworms `Schistosoma mansoni`
##    <chr>                           <int>   <int>     <int>                 <int>
##  1 Uganda                           1488     466       499                   523
##  2 Philippines                       249     132       117                     0
##  3 Senegal                           211     106       105                     0
##  4 Sierra Leone                      156      52        52                    52
##  5 Ghana                             154      77        77                     0
##  6 Eritrea                            80      40        40                     0
##  7 Angola                             76      38        38                     0
##  8 Malawi                             66      33        33                     0
##  9 Burundi                            44      22        22                     0
## 10 Nigeria                            24      20         4                     0
## 11 Ethiopia                            8       0         2                     6
## 12 United Republic of Tanzania         7       0         2                     5
## 13 South Africa                        4       0         4                     0
## 14 Cote D'Ivoire                       3       1         2                     0
## 15 China                               2       1         1                     0
## 16 Nepal                               2       0         2                     0
## 17 Zambia                              2       0         0                     2
## 18 Cameroon                            1       1         0                     0
## 19 Democratic Republic of the Con…     1       0         0                     1

Q4: 綜合案例最多的前三個國家,各寄生蟲的數據統計,以 box-plot 呈現

# 找出綜合案例最多的前三個國家
top_countries <- combined_data %>%
  count(Country) %>%
  arrange(desc(n)) %>%
  slice_head(n = 3) %>%
  pull(Country)

# 篩選數據
data_top_countries <- combined_data %>%
  filter(Country %in% top_countries) %>%
  mutate(Country = factor(Country, levels = top_countries))  # 對國家排序

# 繪製 box-plot
plot_boxplot_country <- data_top_countries %>%
  ggplot(aes(x = Parasite, y = Prevalence, fill = Parasite)) +
  geom_boxplot() +
  facet_wrap(~ Country, scales = "free_y") +
  labs(
    title = "Prevalence In Top 3 Countries",
    x = "Type of Helminths",
    y = "Prevalence",
    fill = "Helminths"
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1), # 旋轉 X-axis
    legend.position = "bottom"
  )

print(plot_boxplot_country)

Q5: 綜合案例最少的三個國家(但需至少大於10),以 box-plot 呈現

# 找出總案例數最少(但至少 >= 10)的三個國家
top_countries_min_cases <- combined_data %>%
  count(Country, name = "Total_Cases") %>%
  filter(Total_Cases >= 10) %>%
  arrange(Total_Cases) %>%
  slice_head(n = 3) %>%
  pull(Country)


data_min_cases_countries <- combined_data %>%
  filter(Country %in% top_countries_min_cases) %>%
  mutate(Country = factor(Country, levels = top_countries_min_cases))

# 繪製 box-plot
plot_boxplot_min_cases <- data_min_cases_countries %>%
  ggplot(aes(x = Parasite, y = Prevalence, fill = Parasite)) +
  geom_boxplot() +
  facet_wrap(~ Country, scales = "free_y") +
  labs(
    title = "Prevalence In Last 3 Countries (>=10)",
    x = "Type of Helminths",
    y = "Prevalence",
    fill = "Helminths"
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

print(plot_boxplot_min_cases)

Q6:觀察各寄生蟲盛行率隨時間的變化

# 計算每一年份的平均盛行率
temporal_trend <- combined_data %>%
  group_by(Year_start, Parasite) %>%
  summarise(
    Mean_Prevalence = mean(Prevalence),
    .groups = 'drop'
  )

# 繪製散點圖, 添加線性迴歸趨勢線
plot_temporal <- temporal_trend %>%
  ggplot(aes(x = Year_start, y = Mean_Prevalence, color = Parasite)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) + # 添加線性迴歸趨勢線
  labs(
    title = "Prevalence Change by Time",
    x = "Year_Start",
    y = "Mean Prevalence",
    color = "Helminths"
  ) +
  theme_bw()


print(plot_temporal)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

Q7: 觀察各寄生蟲盛行率,在不同年齡層的變化

# 計算平均年齡
age_data <- combined_data %>%
  mutate(Average_Age = (Age_start + Age_end) / 2) %>% # 添加 Average_Age
  filter(Age_end >= Age_start, !is.na(Average_Age))  # 排除年齡範圍異常值

# 繪製平均年齡與盛行率的散點圖
plot_age_prevalence <- age_data %>%
  ggplot(aes(x = Average_Age, y = Prevalence, color = Parasite)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE) + # 透過 LOESS 觀察非線性趨勢
  labs(
    title = "Relation between Age and Prevalence",
    x = "Average Age",
    y = "Prevalence",
    color = "Helminths"
  ) +
  theme_bw()


print(plot_age_prevalence)
## `geom_smooth()` using formula = 'y ~ x'