library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.1.4     v readr     2.1.6
## v forcats   1.0.1     v stringr   1.6.0
## v ggplot2   4.0.1     v tibble    3.3.1
## v lubridate 1.9.4     v tidyr     1.3.2
## v purrr     1.2.1     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## 载入需要的程序包:airports
## 载入需要的程序包:cherryblossom
## 载入需要的程序包:usdata
who1 <- who %>% 
  pivot_longer(
    cols = new_sp_m014:newrel_f65, 
    names_to = "key", 
    values_to = "cases", 
    values_drop_na = TRUE
  )
who1
## # A tibble: 76,046 x 6
##    country     iso2  iso3   year key          cases
##    <chr>       <chr> <chr> <dbl> <chr>        <dbl>
##  1 Afghanistan AF    AFG    1997 new_sp_m014      0
##  2 Afghanistan AF    AFG    1997 new_sp_m1524    10
##  3 Afghanistan AF    AFG    1997 new_sp_m2534     6
##  4 Afghanistan AF    AFG    1997 new_sp_m3544     3
##  5 Afghanistan AF    AFG    1997 new_sp_m4554     5
##  6 Afghanistan AF    AFG    1997 new_sp_m5564     2
##  7 Afghanistan AF    AFG    1997 new_sp_m65       0
##  8 Afghanistan AF    AFG    1997 new_sp_f014      5
##  9 Afghanistan AF    AFG    1997 new_sp_f1524    38
## 10 Afghanistan AF    AFG    1997 new_sp_f2534    36
## # i 76,036 more rows
who2 <- who1 %>% 
  mutate(key = str_replace(key, "newrel", "new_rel"))
who2
## # A tibble: 76,046 x 6
##    country     iso2  iso3   year key          cases
##    <chr>       <chr> <chr> <dbl> <chr>        <dbl>
##  1 Afghanistan AF    AFG    1997 new_sp_m014      0
##  2 Afghanistan AF    AFG    1997 new_sp_m1524    10
##  3 Afghanistan AF    AFG    1997 new_sp_m2534     6
##  4 Afghanistan AF    AFG    1997 new_sp_m3544     3
##  5 Afghanistan AF    AFG    1997 new_sp_m4554     5
##  6 Afghanistan AF    AFG    1997 new_sp_m5564     2
##  7 Afghanistan AF    AFG    1997 new_sp_m65       0
##  8 Afghanistan AF    AFG    1997 new_sp_f014      5
##  9 Afghanistan AF    AFG    1997 new_sp_f1524    38
## 10 Afghanistan AF    AFG    1997 new_sp_f2534    36
## # i 76,036 more rows
who3 <- who2 %>% 
  separate(key, c("new", "type", "sexage"), sep = "_")
who3
## # A tibble: 76,046 x 8
##    country     iso2  iso3   year new   type  sexage cases
##    <chr>       <chr> <chr> <dbl> <chr> <chr> <chr>  <dbl>
##  1 Afghanistan AF    AFG    1997 new   sp    m014       0
##  2 Afghanistan AF    AFG    1997 new   sp    m1524     10
##  3 Afghanistan AF    AFG    1997 new   sp    m2534      6
##  4 Afghanistan AF    AFG    1997 new   sp    m3544      3
##  5 Afghanistan AF    AFG    1997 new   sp    m4554      5
##  6 Afghanistan AF    AFG    1997 new   sp    m5564      2
##  7 Afghanistan AF    AFG    1997 new   sp    m65        0
##  8 Afghanistan AF    AFG    1997 new   sp    f014       5
##  9 Afghanistan AF    AFG    1997 new   sp    f1524     38
## 10 Afghanistan AF    AFG    1997 new   sp    f2534     36
## # i 76,036 more rows
who4 <- who3 %>% 
  select(-new, -iso2, -iso3)
who5 <- who4 %>% 
  separate(sexage, c("sex", "age"), sep = 1)
who5
## # A tibble: 76,046 x 6
##    country      year type  sex   age   cases
##    <chr>       <dbl> <chr> <chr> <chr> <dbl>
##  1 Afghanistan  1997 sp    m     014       0
##  2 Afghanistan  1997 sp    m     1524     10
##  3 Afghanistan  1997 sp    m     2534      6
##  4 Afghanistan  1997 sp    m     3544      3
##  5 Afghanistan  1997 sp    m     4554      5
##  6 Afghanistan  1997 sp    m     5564      2
##  7 Afghanistan  1997 sp    m     65        0
##  8 Afghanistan  1997 sp    f     014       5
##  9 Afghanistan  1997 sp    f     1524     38
## 10 Afghanistan  1997 sp    f     2534     36
## # i 76,036 more rows
who5 %>%
  filter(year == 1997) %>%
  ggplot() + 
  stat_summary(aes(x = sex, y = cases, fill = age), fun = 'sum', geom = 'bar', position = 'dodge') + 
  labs(title = "TB cases in the world in 1997", x = "Gender", y = "Case Counts") +
  theme(plot.title = element_text(hjust = 0.5, size = rel(1.2), margin = margin(0,0,15,0)), axis.title.x = element_text(size = rel(1.0), margin = margin(10,0,0,0)), axis.title.y = element_text(size = rel(1.0), margin = margin(0,10,0,0)), axis.text = element_text(size = rel(1.0)), plot.margin = margin(1,1,1,1,"cm")) + 
  scale_fill_discrete(labels = c("0-14 yrs", "15-24 yrs", "25-34 yrs", "35-44 yrs", "45-54 yrs", "55-64 yrs", "> 65 yrs"))

tb_year <- who5 %>%
  group_by(year) %>%
  summarise(total_cases = sum(cases, na.rm = TRUE))
ggplot(tb_year, aes(x = year, y = total_cases)) +
  geom_line() +
  geom_point()

tb2010 <- who5 %>%
  filter(year == 2010)
tb2010
## # A tibble: 7,169 x 6
##    country      year type  sex   age   cases
##    <chr>       <dbl> <chr> <chr> <chr> <dbl>
##  1 Afghanistan  2010 sp    m     014     197
##  2 Afghanistan  2010 sp    m     1524    986
##  3 Afghanistan  2010 sp    m     2534    819
##  4 Afghanistan  2010 sp    m     3544    491
##  5 Afghanistan  2010 sp    m     4554    490
##  6 Afghanistan  2010 sp    m     5564    641
##  7 Afghanistan  2010 sp    m     65      622
##  8 Afghanistan  2010 sp    f     014     445
##  9 Afghanistan  2010 sp    f     1524   2107
## 10 Afghanistan  2010 sp    f     2534   2263
## # i 7,159 more rows
tb2010_sex <- tb2010 %>%
  group_by(country, sex) %>%
  summarise(total_cases = sum(cases, na.rm = TRUE),.groups = "drop")
tb2010_sex
## # A tibble: 407 x 3
##    country        sex   total_cases
##    <chr>          <chr>       <dbl>
##  1 Afghanistan    f            8701
##  2 Afghanistan    m            4246
##  3 Albania        f             124
##  4 Albania        m             291
##  5 Algeria        f            3248
##  6 Algeria        m            5051
##  7 American Samoa f               1
##  8 American Samoa m               3
##  9 Andorra        f               3
## 10 Andorra        m               4
## # i 397 more rows
tb_ratio <- tb2010_sex %>%
  pivot_wider(
    names_from = sex,
    values_from = total_cases
  )
  tb_ratio <-tb_ratio%>%
    mutate(ratio = m / f)
tb_ratio %>%
  arrange(desc(ratio))
## # A tibble: 204 x 4
##    country                    f     m  ratio
##    <chr>                  <dbl> <dbl>  <dbl>
##  1 Anguilla                   0     1 Inf   
##  2 Bermuda                    0     1 Inf   
##  3 British Virgin Islands     0     1 Inf   
##  4 Antigua and Barbuda        1     5   5   
##  5 Seychelles                 3    14   4.67
##  6 Qatar                    105   475   4.52
##  7 Cuba                     184   588   3.20
##  8 Jamaica                   31    97   3.13
##  9 Trinidad and Tobago       53   161   3.04
## 10 American Samoa             1     3   3   
## # i 194 more rows

Anguilla Bermuda British Virgin Islands