data <- read_excel("myData.xlsx")
data %>% count(region)
## # A tibble: 7 × 2
## region n
## <chr> <int>
## 1 East Asia & Pacific 740
## 2 Europe & Central Asia 1160
## 3 Latin America & Caribbean 840
## 4 Middle East & North Africa 420
## 5 North America 60
## 6 South Asia 160
## 7 Sub-Saharan Africa 960
region_levels <- c("Europe & Central Asia", "South Asia", "North America", "Latin America & Caribbean", "Middle East & North Africa", "Sub-Saharan Africa")
data_rev <- data %>%
mutate(region = factor(region, levels = region_levels))
# Transform data: calculate average data_use_score by region
data_use_by_region <- data %>%
group_by(region) %>%
summarise(
avg_data_use = mean(data_use_score, na.rm = TRUE))
data_use_by_region
## # A tibble: 7 × 2
## region avg_data_use
## <chr> <dbl>
## 1 East Asia & Pacific 45.2
## 2 Europe & Central Asia 59.8
## 3 Latin America & Caribbean 46.0
## 4 Middle East & North Africa 49.3
## 5 North America 54.8
## 6 South Asia 55.7
## 7 Sub-Saharan Africa 47.9
# Plot
data_use_by_region %>%
ggplot(aes(x = avg_data_use, y = region)) +
geom_point()
data_use_by_region %>%
ggplot(aes(x = avg_data_use,
y = fct_reorder(.f = region, .x = avg_data_use))) +
geom_point() +
labs(y = NULL, x = "Average Data Use Score")
Show examples of three functions:
data <- data %>%
mutate(region_recode = fct_recode(region,
"Europe" = "Europe & Central Asia",
"Asia" = "East Asia & Pacific"
))
data <- data %>%
mutate(region_grouped = fct_collapse(region,
"America" = c("North America", "Latin America & Caribbean"),
"Europe" = c("Europe & Central Asia"),
"Asia" = c("East Asia & Pacific", "South Asia"),
"Others" = c("Sub-Saharan Africa", "Middle East & North Africa")))
data %>%
mutate(region = fct_lump(region)) %>%
count(region)
## # A tibble: 5 × 2
## region n
## <fct> <int>
## 1 East Asia & Pacific 740
## 2 Europe & Central Asia 1160
## 3 Latin America & Caribbean 840
## 4 Sub-Saharan Africa 960
## 5 Other 640
No need to do anything here.