Q1:
針對各寄生蟲於各國的盛行率作簡單統計,包括平均數,最小值,最大值等
# 增加寄生蟲類別, 合併資料
combined_data <- data_schisto %>% mutate(Parasite = "Schistosoma mansoni") %>%
bind_rows(data_hookworm %>% mutate(Parasite = "Hookworms")) %>%
bind_rows(data_ascaris %>% mutate(Parasite = "Ascaris")) %>%
filter(!is.na(Prevalence)) # 去除NA值
# 計算各寄生蟲盛行率的平均數,中位數,最小值,最大值
Pre_summary <- combined_data %>%
group_by(Parasite) %>%
summarise(
Count = n(),
Mean_Prevalence = mean(Prevalence),
Median_Prevalence = median(Prevalence),
Min_Prevalence = min(Prevalence),
Max_Prevalence = max(Prevalence)
)
print(Pre_summary)
## # A tibble: 3 × 6
## Parasite Count Mean_Prevalence Median_Prevalence Min_Prevalence Max_Prevalence
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Ascaris 989 0.102 0.0161 0 0.952
## 2 Hookwor… 1000 0.218 0.0870 0 1
## 3 Schisto… 589 0.212 0.0656 0 1
Q2: 統計每個國家/寄生蟲組合的案例數
case_counts <- combined_data %>%
group_by(Country, Parasite) %>%
summarise(
Study_Cases = n(),
.groups = 'drop'
)
case_counts <- case_counts %>%
pivot_wider(
names_from = Parasite,
values_from = Study_Cases,
values_fill = 0 # 如果某國家沒有某寄生蟲的案例,則為 0
) %>%
arrange(Country) # 依據國家字母進行排列
print(case_counts)
## # A tibble: 19 × 4
## Country Ascaris Hookworms `Schistosoma mansoni`
## <chr> <int> <int> <int>
## 1 Angola 38 38 0
## 2 Burundi 22 22 0
## 3 Cameroon 1 0 0
## 4 China 1 1 0
## 5 Cote D'Ivoire 1 2 0
## 6 Democratic Republic of the Congo 0 0 1
## 7 Eritrea 40 40 0
## 8 Ethiopia 0 2 6
## 9 Ghana 77 77 0
## 10 Malawi 33 33 0
## 11 Nepal 0 2 0
## 12 Nigeria 20 4 0
## 13 Philippines 132 117 0
## 14 Senegal 106 105 0
## 15 Sierra Leone 52 52 52
## 16 South Africa 0 4 0
## 17 Uganda 466 499 523
## 18 United Republic of Tanzania 0 2 5
## 19 Zambia 0 0 2
Q3: 添加新column (Total),
計算每個國家的三種寄生蟲案例總和,並降序排列
case_Total <- case_counts %>%
mutate(
Total = `Schistosoma mansoni` + Hookworms + Ascaris
) %>%
arrange(desc(Total)) %>% # 依照 Total 排列
select(Country, Total, everything()) # 將 Total 移到 Country 後
print(case_Total)
## # A tibble: 19 × 5
## Country Total Ascaris Hookworms `Schistosoma mansoni`
## <chr> <int> <int> <int> <int>
## 1 Uganda 1488 466 499 523
## 2 Philippines 249 132 117 0
## 3 Senegal 211 106 105 0
## 4 Sierra Leone 156 52 52 52
## 5 Ghana 154 77 77 0
## 6 Eritrea 80 40 40 0
## 7 Angola 76 38 38 0
## 8 Malawi 66 33 33 0
## 9 Burundi 44 22 22 0
## 10 Nigeria 24 20 4 0
## 11 Ethiopia 8 0 2 6
## 12 United Republic of Tanzania 7 0 2 5
## 13 South Africa 4 0 4 0
## 14 Cote D'Ivoire 3 1 2 0
## 15 China 2 1 1 0
## 16 Nepal 2 0 2 0
## 17 Zambia 2 0 0 2
## 18 Cameroon 1 1 0 0
## 19 Democratic Republic of the Con… 1 0 0 1
Q4: 綜合案例最多的前三個國家,各寄生蟲的數據統計,以 box-plot
呈現
# 找出綜合案例最多的前三個國家
top_countries <- combined_data %>%
count(Country) %>%
arrange(desc(n)) %>%
slice_head(n = 3) %>%
pull(Country)
# 篩選數據
data_top_countries <- combined_data %>%
filter(Country %in% top_countries) %>%
mutate(Country = factor(Country, levels = top_countries)) # 對國家排序
# 繪製 box-plot
plot_boxplot_country <- data_top_countries %>%
ggplot(aes(x = Parasite, y = Prevalence, fill = Parasite)) +
geom_boxplot() +
facet_wrap(~ Country, scales = "free_y") +
labs(
title = "Prevalence In Top 3 Countries",
x = "Type of Helminths",
y = "Prevalence",
fill = "Helminths"
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1), # 旋轉 X-axis
legend.position = "bottom"
)
print(plot_boxplot_country)

Q5: 綜合案例最少的三個國家(但需至少大於10),以 box-plot 呈現
# 找出總案例數最少(但至少 >= 10)的三個國家
top_countries_min_cases <- combined_data %>%
count(Country, name = "Total_Cases") %>%
filter(Total_Cases >= 10) %>%
arrange(Total_Cases) %>%
slice_head(n = 3) %>%
pull(Country)
data_min_cases_countries <- combined_data %>%
filter(Country %in% top_countries_min_cases) %>%
mutate(Country = factor(Country, levels = top_countries_min_cases))
# 繪製 box-plot
plot_boxplot_min_cases <- data_min_cases_countries %>%
ggplot(aes(x = Parasite, y = Prevalence, fill = Parasite)) +
geom_boxplot() +
facet_wrap(~ Country, scales = "free_y") +
labs(
title = "Prevalence In Last 3 Countries (>=10)",
x = "Type of Helminths",
y = "Prevalence",
fill = "Helminths"
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
print(plot_boxplot_min_cases)

Q6:觀察各寄生蟲盛行率隨時間的變化
# 計算每一年份的平均盛行率
temporal_trend <- combined_data %>%
group_by(Year_start, Parasite) %>%
summarise(
Mean_Prevalence = mean(Prevalence),
.groups = 'drop'
)
# 繪製散點圖, 添加線性迴歸趨勢線
plot_temporal <- temporal_trend %>%
ggplot(aes(x = Year_start, y = Mean_Prevalence, color = Parasite)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) + # 添加線性迴歸趨勢線
labs(
title = "Prevalence Change by Time",
x = "Year_Start",
y = "Mean Prevalence",
color = "Helminths"
) +
theme_bw()
print(plot_temporal)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

Q7: 觀察各寄生蟲盛行率,在不同年齡層的變化
# 計算平均年齡
age_data <- combined_data %>%
mutate(Average_Age = (Age_start + Age_end) / 2) %>% # 添加 Average_Age
filter(Age_end >= Age_start, !is.na(Average_Age)) # 排除年齡範圍異常值
# 繪製平均年齡與盛行率的散點圖
plot_age_prevalence <- age_data %>%
ggplot(aes(x = Average_Age, y = Prevalence, color = Parasite)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "loess", se = FALSE) + # 透過 LOESS 觀察非線性趨勢
labs(
title = "Relation between Age and Prevalence",
x = "Average Age",
y = "Prevalence",
color = "Helminths"
) +
theme_bw()
print(plot_age_prevalence)
## `geom_smooth()` using formula = 'y ~ x'
