Import your data

data <- read_excel("../00_data/newmyData.xlsx")
data
## # A tibble: 20,755 × 4
##    Entity      Code   Year LifeExpectancy
##    <chr>       <chr> <dbl>          <dbl>
##  1 Afghanistan AFG    1950           27.7
##  2 Afghanistan AFG    1951           28.0
##  3 Afghanistan AFG    1952           28.4
##  4 Afghanistan AFG    1953           28.9
##  5 Afghanistan AFG    1954           29.2
##  6 Afghanistan AFG    1955           29.9
##  7 Afghanistan AFG    1956           30.4
##  8 Afghanistan AFG    1957           30.9
##  9 Afghanistan AFG    1958           31.5
## 10 Afghanistan AFG    1959           32.0
## # ℹ 20,745 more rows

Chapter 15

Create a factor

data %>% count(Code)
## # A tibble: 239 × 2
##    Code      n
##    <chr> <int>
##  1 ABW      72
##  2 AFG      72
##  3 AGO      73
##  4 AIA      72
##  5 ALB      72
##  6 AND      72
##  7 ARE      72
##  8 ARG      87
##  9 ARM      72
## 10 ASM      72
## # ℹ 229 more rows
code_levels <- c("ABW","AFG","ARG","ALB")

data_rev <- data %>%
    mutate(Code = Code %>% factor(levels= code_levels))

data_rev
## # A tibble: 20,755 × 4
##    Entity      Code   Year LifeExpectancy
##    <chr>       <fct> <dbl>          <dbl>
##  1 Afghanistan AFG    1950           27.7
##  2 Afghanistan AFG    1951           28.0
##  3 Afghanistan AFG    1952           28.4
##  4 Afghanistan AFG    1953           28.9
##  5 Afghanistan AFG    1954           29.2
##  6 Afghanistan AFG    1955           29.9
##  7 Afghanistan AFG    1956           30.4
##  8 Afghanistan AFG    1957           30.9
##  9 Afghanistan AFG    1958           31.5
## 10 Afghanistan AFG    1959           32.0
## # ℹ 20,745 more rows
data_rev2 <- data_rev %>%
    filter(Code %in% c("ABW", "AFG", "ARG","ALB"))
data_rev2
## # A tibble: 303 × 4
##    Entity      Code   Year LifeExpectancy
##    <chr>       <fct> <dbl>          <dbl>
##  1 Afghanistan AFG    1950           27.7
##  2 Afghanistan AFG    1951           28.0
##  3 Afghanistan AFG    1952           28.4
##  4 Afghanistan AFG    1953           28.9
##  5 Afghanistan AFG    1954           29.2
##  6 Afghanistan AFG    1955           29.9
##  7 Afghanistan AFG    1956           30.4
##  8 Afghanistan AFG    1957           30.9
##  9 Afghanistan AFG    1958           31.5
## 10 Afghanistan AFG    1959           32.0
## # ℹ 293 more rows

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- data_rev2 %>%
  group_by(Code) %>%
  summarise(
    LifeExpectancy = mean(LifeExpectancy, na.rm = TRUE)
  )

data_summary
## # A tibble: 4 × 2
##   Code  LifeExpectancy
##   <fct>          <dbl>
## 1 ABW             70.3
## 2 AFG             45.4
## 3 ARG             65.4
## 4 ALB             68.3
ggplot(data_summary, aes(LifeExpectancy, Code)) + geom_point()

ggplot(data_summary, aes(LifeExpectancy, fct_reorder(Code,LifeExpectancy))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(Code = fct_recode (Code,
                              "Albanian" = "ALB",
                              "Aruban" = "ABW",
                              "Argentinian" = "ARG",
                              "Afghani" = "AFG")) %>%
    count(Code)
## # A tibble: 239 × 2
##    Code            n
##    <fct>       <int>
##  1 Aruban         72
##  2 Afghani        72
##  3 AGO            73
##  4 AIA            72
##  5 Albanian       72
##  6 AND            72
##  7 ARE            72
##  8 Argentinian    87
##  9 ARM            72
## 10 ASM            72
## # ℹ 229 more rows
  • fct_collapse
data %>%
    mutate(Code = fct_collapse(Code,
                              European = "ALB",
                              Other = c("ABW","AFG","ARG"),
                              )) %>%
    count(Code)
## # A tibble: 237 × 2
##    Code         n
##    <fct>    <int>
##  1 Other      231
##  2 AGO         73
##  3 AIA         72
##  4 European    72
##  5 AND         72
##  6 ARE         72
##  7 ARM         72
##  8 ASM         72
##  9 ATG         72
## 10 AUS        104
## # ℹ 227 more rows
  • fct_lump
data %>%
    mutate(Code = fct_lump(Code)) %>%
    count(Code)
## # A tibble: 239 × 2
##    Code      n
##    <fct> <int>
##  1 ABW      72
##  2 AFG      72
##  3 AGO      73
##  4 AIA      72
##  5 ALB      72
##  6 AND      72
##  7 ARE      72
##  8 ARG      87
##  9 ARM      72
## 10 ASM      72
## # ℹ 229 more rows

Chapter 16

No need to do anything here.