Import data

# excel file
data <- read_excel("My_Data.xlsx")
data

## # A tibble: 1,302 × 9
##    Language   Endonym `World Region` Country `Global Speakers` `Language Family`
##    <chr>      <chr>   <chr>          <chr>               <dbl> <chr>            
##  1 Abakuá     Abakuá  Caribbean      "Cuba"                 NA <NA>             
##  2 Abaza      Абаза   Western Asia   "Turke…             49800 Abkhaz-Adyge     
##  3 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  4 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  5 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  6 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  7 Adjoukrou  <NA>    Western Africa "Ivory…            140000 Atlantic-Congo   
##  8 Adyghe     <NA>    Western Asia   "Turke…            117500 Abkhaz-Adyge     
##  9 Afenmai    Afenmai Western Africa "Niger…            270000 Atlantic-Congo   
## 10 African-A… Black … Northern Amer… "Unite…          45109521 Indo-European    
## # ℹ 1,292 more rows
## # ℹ 3 more variables: Location <chr>, Size <chr>, Status <chr>

Apply the following dplyr verbs to your data

data %>%

# Filter rows
    filter(Status == "Historical") %>%
    
# Select columns
    select(Language, Size, `World Region`, Country, Status, `Global Speakers`) %>%
    
# Arrange rows 
      mutate(`Global Speakers` = as.numeric(`Global Speakers`)) %>%
  arrange(desc(Size)) %>%

# Add columns
      mutate(North_America = `World Region` == "Northern America") %>%

# Summarize by groups
    group_by(Country) %>%
        summarise(`Global Speakers` = mean(`Global Speakers`, na.rm = TRUE)) %>%
        arrange(desc('Global Speakers'))

## # A tibble: 50 × 2
##    Country                              `Global Speakers`
##    <chr>                                            <dbl>
##  1 "Angola"                                       2100000
##  2 "Armenia,\r\nTurkey,\r\nLebanon"               3843000
##  3 "Austria,\r\nGermany"                         14359000
##  4 "Bangladesh,\r\nIndia"                       265042480
##  5 "Belgium"                                      3550000
##  6 "Canada"                                       4503271
##  7 "Canada,\r\nGreenland"                            1200
##  8 "Croatia"                                         1400
##  9 "Cuba"                                        11340000
## 10 "D.R. Congo,\r\nCongo (Brazzaville)"           6500000
## # ℹ 40 more rows

Module 6: Apply 5

Alex Lenfest

Import data

Apply the following dplyr verbs to your data