Import your data

data <- read_excel("../01_module4/data/myData.xlsx")

#Adding a categorical variable
data1 <- data%>%
    mutate(continent = countrycode (country, 
                                    origin = "country.name",
                                    destination = "continent"))


data1 %>% distinct(continent)
## # A tibble: 5 × 1
##   continent
##   <chr>    
## 1 Europe   
## 2 Oceania  
## 3 Asia     
## 4 Americas 
## 5 Africa

Chapter 15

Create a factor

data1 %>% count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <chr>     <int>
## 1 Africa       54
## 2 Americas     35
## 3 Asia         48
## 4 Europe       42
## 5 Oceania      14
continent_levles <- c("Americas", "Europe", "Africa", "Asia", "Oceania")

data1_rev <-  data1 %>%
    mutate(continent = continent %>% factor(levels = continent_levles))

 data1_rev %>% count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Americas     35
## 2 Europe       42
## 3 Africa       54
## 4 Asia         48
## 5 Oceania      14

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- data1 %>%
  group_by(continent) %>%
  summarise(
    hdi = mean(human_development_index_hdi, na.rm = TRUE))

data_summary
## # A tibble: 5 × 2
##   continent   hdi
##   <chr>     <dbl>
## 1 Africa    0.580
## 2 Americas  0.778
## 3 Asia      0.763
## 4 Europe    0.899
## 5 Oceania   0.718
ggplot(data_summary, aes(hdi, continent)) + geom_point()

#Sorted 
ggplot(data_summary, aes(hdi, fct_reorder(continent, hdi))) + geom_point()

Modify factor levels

Show examples of three functions:

fct_recode

data1 %>%
    mutate(continent = fct_recode(continent,
                                  "North & South America" = "Americas")) %>%
    count(continent)
## # A tibble: 5 × 2
##   continent                 n
##   <fct>                 <int>
## 1 Africa                   54
## 2 North & South America    35
## 3 Asia                     48
## 4 Europe                   42
## 5 Oceania                  14

fct_collapse

data1 %>%
    mutate(continent = fct_collapse(continent, 
                                   Developed_Country = c("Europe", "Americas", "Oceania" ),
                                   Developing_Country = c("Asia", "Africa"))) %>%
    count(continent)
## # A tibble: 2 × 2
##   continent              n
##   <fct>              <int>
## 1 Developing_Country   102
## 2 Developed_Country     91

fct_lump

data1 %>%
    mutate(continent = fct_lump(continent)) %>%
    count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Africa       54
## 2 Americas     35
## 3 Asia         48
## 4 Europe       42
## 5 Other        14

Chapter 16

No need to do anything here.