Import your data

data <- read_excel("myData.xlsx")

Chapter 15

Create a factor

data %>% count(region)
## # A tibble: 7 × 2
##   region                         n
##   <chr>                      <int>
## 1 East Asia & Pacific          740
## 2 Europe & Central Asia       1160
## 3 Latin America & Caribbean    840
## 4 Middle East & North Africa   420
## 5 North America                 60
## 6 South Asia                   160
## 7 Sub-Saharan Africa           960
region_levels <- c("Europe & Central Asia", "South Asia", "North America", "Latin America & Caribbean", "Middle East & North Africa", "Sub-Saharan Africa")

data_rev <- data %>%
  mutate(region = factor(region, levels = region_levels))

Modify factor order

# Transform data: calculate average data_use_score by region

data_use_by_region <- data %>%
  
  group_by(region) %>%
  summarise(
    avg_data_use = mean(data_use_score, na.rm = TRUE))

data_use_by_region
## # A tibble: 7 × 2
##   region                     avg_data_use
##   <chr>                             <dbl>
## 1 East Asia & Pacific                45.2
## 2 Europe & Central Asia              59.8
## 3 Latin America & Caribbean          46.0
## 4 Middle East & North Africa         49.3
## 5 North America                      54.8
## 6 South Asia                         55.7
## 7 Sub-Saharan Africa                 47.9
# Plot
data_use_by_region %>%
  
  ggplot(aes(x = avg_data_use, y = region)) +
  geom_point()

data_use_by_region %>%
  
  ggplot(aes(x = avg_data_use, 
             y = fct_reorder(.f = region, .x = avg_data_use))) +
  geom_point() +
  
  labs(y = NULL, x = "Average Data Use Score")

Modify factor levels

Show examples of three functions:

  • fct_recode
data <- data %>%
  mutate(region_recode = fct_recode(region,
    "Europe" = "Europe & Central Asia",
    "Asia" = "East Asia & Pacific"))

data %>% count(region_recode, sort = TRUE)
## # A tibble: 7 × 2
##   region_recode                  n
##   <fct>                      <int>
## 1 Europe                      1160
## 2 Sub-Saharan Africa           960
## 3 Latin America & Caribbean    840
## 4 Asia                         740
## 5 Middle East & North Africa   420
## 6 South Asia                   160
## 7 North America                 60
  • fct_collapse
data <- data %>%
  mutate(region_grouped = fct_collapse(region,
    "America" = c("North America", "Latin America & Caribbean"),
    "Europe" = c("Europe & Central Asia"),
    "Asia" = c("East Asia & Pacific", "South Asia"),
    "Others" = c("Sub-Saharan Africa", "Middle East & North Africa")))

data %>% count(region_grouped, sort = TRUE)
## # A tibble: 4 × 2
##   region_grouped     n
##   <fct>          <int>
## 1 Others          1380
## 2 Europe          1160
## 3 Asia             900
## 4 America          900
  • fct_lump
data%>%
mutate(region = fct_lump(region, prop = 0.05)) %>% 
  count(region, sort = T)
## # A tibble: 6 × 2
##   region                         n
##   <fct>                      <int>
## 1 Europe & Central Asia       1160
## 2 Sub-Saharan Africa           960
## 3 Latin America & Caribbean    840
## 4 East Asia & Pacific          740
## 5 Middle East & North Africa   420
## 6 Other                        220

Chapter 16

No need to do anything here.