library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.4 v readr 2.1.6
## v forcats 1.0.1 v stringr 1.6.0
## v ggplot2 4.0.1 v tibble 3.3.1
## v lubridate 1.9.4 v tidyr 1.3.2
## v purrr 1.2.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## 载入需要的程序包:airports
## 载入需要的程序包:cherryblossom
## 载入需要的程序包:usdata
who1 <- who %>%
pivot_longer(
cols = new_sp_m014:newrel_f65,
names_to = "key",
values_to = "cases",
values_drop_na = TRUE
)
who1
## # A tibble: 76,046 x 6
## country iso2 iso3 year key cases
## <chr> <chr> <chr> <dbl> <chr> <dbl>
## 1 Afghanistan AF AFG 1997 new_sp_m014 0
## 2 Afghanistan AF AFG 1997 new_sp_m1524 10
## 3 Afghanistan AF AFG 1997 new_sp_m2534 6
## 4 Afghanistan AF AFG 1997 new_sp_m3544 3
## 5 Afghanistan AF AFG 1997 new_sp_m4554 5
## 6 Afghanistan AF AFG 1997 new_sp_m5564 2
## 7 Afghanistan AF AFG 1997 new_sp_m65 0
## 8 Afghanistan AF AFG 1997 new_sp_f014 5
## 9 Afghanistan AF AFG 1997 new_sp_f1524 38
## 10 Afghanistan AF AFG 1997 new_sp_f2534 36
## # i 76,036 more rows
who2 <- who1 %>%
mutate(key = str_replace(key, "newrel", "new_rel"))
who2
## # A tibble: 76,046 x 6
## country iso2 iso3 year key cases
## <chr> <chr> <chr> <dbl> <chr> <dbl>
## 1 Afghanistan AF AFG 1997 new_sp_m014 0
## 2 Afghanistan AF AFG 1997 new_sp_m1524 10
## 3 Afghanistan AF AFG 1997 new_sp_m2534 6
## 4 Afghanistan AF AFG 1997 new_sp_m3544 3
## 5 Afghanistan AF AFG 1997 new_sp_m4554 5
## 6 Afghanistan AF AFG 1997 new_sp_m5564 2
## 7 Afghanistan AF AFG 1997 new_sp_m65 0
## 8 Afghanistan AF AFG 1997 new_sp_f014 5
## 9 Afghanistan AF AFG 1997 new_sp_f1524 38
## 10 Afghanistan AF AFG 1997 new_sp_f2534 36
## # i 76,036 more rows
who3 <- who2 %>%
separate(key, c("new", "type", "sexage"), sep = "_")
who3
## # A tibble: 76,046 x 8
## country iso2 iso3 year new type sexage cases
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 Afghanistan AF AFG 1997 new sp m014 0
## 2 Afghanistan AF AFG 1997 new sp m1524 10
## 3 Afghanistan AF AFG 1997 new sp m2534 6
## 4 Afghanistan AF AFG 1997 new sp m3544 3
## 5 Afghanistan AF AFG 1997 new sp m4554 5
## 6 Afghanistan AF AFG 1997 new sp m5564 2
## 7 Afghanistan AF AFG 1997 new sp m65 0
## 8 Afghanistan AF AFG 1997 new sp f014 5
## 9 Afghanistan AF AFG 1997 new sp f1524 38
## 10 Afghanistan AF AFG 1997 new sp f2534 36
## # i 76,036 more rows
who4 <- who3 %>%
select(-new, -iso2, -iso3)
who5 <- who4 %>%
separate(sexage, c("sex", "age"), sep = 1)
who5
## # A tibble: 76,046 x 6
## country year type sex age cases
## <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 Afghanistan 1997 sp m 014 0
## 2 Afghanistan 1997 sp m 1524 10
## 3 Afghanistan 1997 sp m 2534 6
## 4 Afghanistan 1997 sp m 3544 3
## 5 Afghanistan 1997 sp m 4554 5
## 6 Afghanistan 1997 sp m 5564 2
## 7 Afghanistan 1997 sp m 65 0
## 8 Afghanistan 1997 sp f 014 5
## 9 Afghanistan 1997 sp f 1524 38
## 10 Afghanistan 1997 sp f 2534 36
## # i 76,036 more rows
who5 %>%
filter(year == 1997) %>%
ggplot() +
stat_summary(aes(x = sex, y = cases, fill = age), fun = 'sum', geom = 'bar', position = 'dodge') +
labs(title = "TB cases in the world in 1997", x = "Gender", y = "Case Counts") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.2), margin = margin(0,0,15,0)), axis.title.x = element_text(size = rel(1.0), margin = margin(10,0,0,0)), axis.title.y = element_text(size = rel(1.0), margin = margin(0,10,0,0)), axis.text = element_text(size = rel(1.0)), plot.margin = margin(1,1,1,1,"cm")) +
scale_fill_discrete(labels = c("0-14 yrs", "15-24 yrs", "25-34 yrs", "35-44 yrs", "45-54 yrs", "55-64 yrs", "> 65 yrs"))
tb_year <- who5 %>%
group_by(year) %>%
summarise(total_cases = sum(cases, na.rm = TRUE))
ggplot(tb_year, aes(x = year, y = total_cases)) +
geom_line() +
geom_point()
tb2010 <- who5 %>%
filter(year == 2010)
tb2010
## # A tibble: 7,169 x 6
## country year type sex age cases
## <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 Afghanistan 2010 sp m 014 197
## 2 Afghanistan 2010 sp m 1524 986
## 3 Afghanistan 2010 sp m 2534 819
## 4 Afghanistan 2010 sp m 3544 491
## 5 Afghanistan 2010 sp m 4554 490
## 6 Afghanistan 2010 sp m 5564 641
## 7 Afghanistan 2010 sp m 65 622
## 8 Afghanistan 2010 sp f 014 445
## 9 Afghanistan 2010 sp f 1524 2107
## 10 Afghanistan 2010 sp f 2534 2263
## # i 7,159 more rows
tb2010_sex <- tb2010 %>%
group_by(country, sex) %>%
summarise(total_cases = sum(cases, na.rm = TRUE),.groups = "drop")
tb2010_sex
## # A tibble: 407 x 3
## country sex total_cases
## <chr> <chr> <dbl>
## 1 Afghanistan f 8701
## 2 Afghanistan m 4246
## 3 Albania f 124
## 4 Albania m 291
## 5 Algeria f 3248
## 6 Algeria m 5051
## 7 American Samoa f 1
## 8 American Samoa m 3
## 9 Andorra f 3
## 10 Andorra m 4
## # i 397 more rows
tb_ratio <- tb2010_sex %>%
pivot_wider(
names_from = sex,
values_from = total_cases
)
tb_ratio <-tb_ratio%>%
mutate(ratio = m / f)
tb_ratio %>%
arrange(desc(ratio))
## # A tibble: 204 x 4
## country f m ratio
## <chr> <dbl> <dbl> <dbl>
## 1 Anguilla 0 1 Inf
## 2 Bermuda 0 1 Inf
## 3 British Virgin Islands 0 1 Inf
## 4 Antigua and Barbuda 1 5 5
## 5 Seychelles 3 14 4.67
## 6 Qatar 105 475 4.52
## 7 Cuba 184 588 3.20
## 8 Jamaica 31 97 3.13
## 9 Trinidad and Tobago 53 161 3.04
## 10 American Samoa 1 3 3
## # i 194 more rows
Anguilla Bermuda British Virgin Islands