Import your data

data <- read_excel("myData.xlsx")

Chapter 15

Create a factor

data %>% count(region)
## # A tibble: 7 × 2
##   region                         n
##   <chr>                      <int>
## 1 East Asia & Pacific          740
## 2 Europe & Central Asia       1160
## 3 Latin America & Caribbean    840
## 4 Middle East & North Africa   420
## 5 North America                 60
## 6 South Asia                   160
## 7 Sub-Saharan Africa           960
region_levels <- c("Europe & Central Asia", "South Asia", "North America", "Latin America & Caribbean", "Middle East & North Africa", "Sub-Saharan Africa")

data_rev <- data %>%
  mutate(region = factor(region, levels = region_levels))

Modify factor order

# Transform data: calculate average data_use_score by region

data_use_by_region <- data %>%
  
  group_by(region) %>%
  summarise(
    avg_data_use = mean(data_use_score, na.rm = TRUE))

data_use_by_region
## # A tibble: 7 × 2
##   region                     avg_data_use
##   <chr>                             <dbl>
## 1 East Asia & Pacific                45.2
## 2 Europe & Central Asia              59.8
## 3 Latin America & Caribbean          46.0
## 4 Middle East & North Africa         49.3
## 5 North America                      54.8
## 6 South Asia                         55.7
## 7 Sub-Saharan Africa                 47.9
# Plot
data_use_by_region %>%
  
  ggplot(aes(x = avg_data_use, y = region)) +
  geom_point()

data_use_by_region %>%
  
  ggplot(aes(x = avg_data_use, 
             y = fct_reorder(.f = region, .x = avg_data_use))) +
  geom_point() +
  
  labs(y = NULL, x = "Average Data Use Score")

Modify factor levels

Show examples of three functions:

  • fct_recode
data <- data %>%
  mutate(region_recode = fct_recode(region,
    "Europe" = "Europe & Central Asia",
    "Asia" = "East Asia & Pacific"
  ))
  • fct_collapse
data <- data %>%
  mutate(region_grouped = fct_collapse(region,
    "America" = c("North America", "Latin America & Caribbean"),
    "Europe" = c("Europe & Central Asia"),
    "Asia" = c("East Asia & Pacific", "South Asia"),
    "Others" = c("Sub-Saharan Africa", "Middle East & North Africa")))
  • fct_lump
data %>%
  mutate(region = fct_lump(region)) %>%
  count(region)
## # A tibble: 5 × 2
##   region                        n
##   <fct>                     <int>
## 1 East Asia & Pacific         740
## 2 Europe & Central Asia      1160
## 3 Latin America & Caribbean   840
## 4 Sub-Saharan Africa          960
## 5 Other                       640

Chapter 16

No need to do anything here.