Import data
# excel file
data <- read_excel("My_Data.xlsx")
data
## # A tibble: 1,302 × 9
## Language Endonym `World Region` Country `Global Speakers` `Language Family`
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 Abakuá Abakuá Caribbean "Cuba" NA <NA>
## 2 Abaza Абаза Western Asia "Turke… 49800 Abkhaz-Adyge
## 3 Abruzzese… Abruzz… Southern Euro… "Italy" NA Indo-European
## 4 Abruzzese… Abruzz… Southern Euro… "Italy" NA Indo-European
## 5 Acehnese Bahsa … Southeastern … "Indon… 3500000 Austronesian
## 6 Acehnese Bahsa … Southeastern … "Indon… 3500000 Austronesian
## 7 Adjoukrou <NA> Western Africa "Ivory… 140000 Atlantic-Congo
## 8 Adyghe <NA> Western Asia "Turke… 117500 Abkhaz-Adyge
## 9 Afenmai Afenmai Western Africa "Niger… 270000 Atlantic-Congo
## 10 African-A… Black … Northern Amer… "Unite… 45109521 Indo-European
## # ℹ 1,292 more rows
## # ℹ 3 more variables: Location <chr>, Size <chr>, Status <chr>
Apply the following dplyr verbs to your data
data %>%
# Filter rows
filter(Status == "Historical") %>%
# Select columns
select(Language, Size, `World Region`, Country, Status, `Global Speakers`) %>%
# Arrange rows
mutate(`Global Speakers` = as.numeric(`Global Speakers`)) %>%
arrange(desc(Size)) %>%
# Add columns
mutate(North_America = `World Region` == "Northern America") %>%
# Summarize by groups
group_by(Country) %>%
summarise(`Global Speakers` = mean(`Global Speakers`, na.rm = TRUE)) %>%
arrange(desc('Global Speakers'))
## # A tibble: 50 × 2
## Country `Global Speakers`
## <chr> <dbl>
## 1 "Angola" 2100000
## 2 "Armenia,\r\nTurkey,\r\nLebanon" 3843000
## 3 "Austria,\r\nGermany" 14359000
## 4 "Bangladesh,\r\nIndia" 265042480
## 5 "Belgium" 3550000
## 6 "Canada" 4503271
## 7 "Canada,\r\nGreenland" 1200
## 8 "Croatia" 1400
## 9 "Cuba" 11340000
## 10 "D.R. Congo,\r\nCongo (Brazzaville)" 6500000
## # ℹ 40 more rows