Import your data

myData <- read.csv("/Users/takeru/Desktop/PSU_DAT3000_IntroToDA/00_data/myData.csv")

Chapter 15

Create a factor

myData %>% count(country)
##                     country  n
## 1                   Algeria  3
## 2                    Angola 22
## 3                     Benin 14
## 4                  Botswana 16
## 5              Burkina Faso 37
## 6                   Burundi  1
## 7                  Cameroon 96
## 8                Cape Verde  2
## 9  Central African Republic 17
## 10                     Chad 24
## 11                  Comoros  2
## 12                    Congo 85
## 13                 Djibouti  4
## 14                    Egypt  1
## 15        Equatorial Guinea  7
## 16                  Eritrea  5
## 17                 Eswatini  1
## 18                 Ethiopia 18
## 19                    Gabon  5
## 20                   Gambia  3
## 21                    Ghana 34
## 22                   Guinea 13
## 23              Ivory Coast 18
## 24                    Kenya 13
## 25                  Lesotho  4
## 26                  Liberia  1
## 27                    Libya  2
## 28               Madagascar  1
## 29                   Malawi  5
## 30                     Mali 23
## 31               Mauritania  4
## 32                Mauritius  2
## 33                  Morocco  4
## 34               Mozambique 13
## 35                  Namibia 24
## 36                    Niger 17
## 37                  Nigeria 73
## 38                   Rwanda  2
## 39                  Senegal  8
## 40               Seychelles  1
## 41             Sierra Leone  5
## 42                  Somalia  4
## 43             South Africa 25
## 44              South Sudan 23
## 45                    Sudan 40
## 46                 Tanzania  9
## 47                     Togo 13
## 48                  Tunisia  1
## 49                   Uganda  9
## 50                   Zambia 22
## 51                 Zimbabwe 20
rank_levels <- c("language", "country", "native_speakers")

data_rev <- myData %>%
  mutate(country = factor(country, levels = rank_levels))

Modify factor order

Make two bar charts here - one before ordering another after

myData %>%
    
    ggplot(aes(x = language, y = country)) +
    geom_point()

myData %>%
    
    ggplot(aes(x = language, y = fct_reorder(.f = language, .x = native_speakers))) +
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean of Native speakers ")

Modify factor levels

Show examples of three functions:

  • fct_recode
myData<- myData %>%
    mutate(family=as.factor(family))%>%
    mutate(family=fct_recode(family,"NS"="Nilo-Saharan","AN"="Austronesian"))
  • fct_collapse
myData<- myData %>%
    mutate(family=fct_collapse(family,"IE_Only"="IE","Other"=c("AA","Austronesian","Nilo-Saharan","Khoe-Kawdi")))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `family = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: IE, AA, Austronesian, Nilo-Saharan, Khoe-Kawdi
  • fct_lump
myData %>%
    mutate(family=fct_lump(as.factor(family),n=2)) %>%
    count(family)
##        family   n
## 1 Niger–Congo 583
## 2          NS 108
## 3       Other 105

Chapter 16

No need to do anything here.