Import your data
setwd("~/Desktop/PSU_DAT3000_IntroToDA/05_module8/data/")
data <- read_excel("My_Data.xlsx")
data
## # A tibble: 1,302 × 9
## Language Endonym `World Region` Country `Global Speakers` `Language Family`
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 Abakuá Abakuá Caribbean "Cuba" NA <NA>
## 2 Abaza Абаза Western Asia "Turke… 49800 Abkhaz-Adyge
## 3 Abruzzese… Abruzz… Southern Euro… "Italy" NA Indo-European
## 4 Abruzzese… Abruzz… Southern Euro… "Italy" NA Indo-European
## 5 Acehnese Bahsa … Southeastern … "Indon… 3500000 Austronesian
## 6 Acehnese Bahsa … Southeastern … "Indon… 3500000 Austronesian
## 7 Adjoukrou <NA> Western Africa "Ivory… 140000 Atlantic-Congo
## 8 Adyghe <NA> Western Asia "Turke… 117500 Abkhaz-Adyge
## 9 Afenmai Afenmai Western Africa "Niger… 270000 Atlantic-Congo
## 10 African-A… Black … Northern Amer… "Unite… 45109521 Indo-European
## # ℹ 1,292 more rows
## # ℹ 3 more variables: Location <chr>, Size <chr>, Status <chr>
data_long <- data %>%
select(`World Region`, Language) %>%
filter(`World Region` %in% c("Western Asia", "Southeastern Asia")) %>%
distinct()
data_long
## # A tibble: 108 × 2
## `World Region` Language
## <chr> <chr>
## 1 Western Asia Abaza
## 2 Southeastern Asia Acehnese
## 3 Western Asia Adyghe
## 4 Southeastern Asia Aklanon
## 5 Western Asia Assyrian Neo-Aramaic
## 6 Western Asia Avar
## 7 Western Asia Azeri
## 8 Southeastern Asia Balinese
## 9 Southeastern Asia Banguingui
## 10 Southeastern Asia Banjarese
## # ℹ 98 more rows
Separating and Uniting
Separate a column
table3_sep <- data_long %>%
separate(col = `World Region`, into = c("World Region", "Continent"))
table3_sep
## # A tibble: 108 × 3
## `World Region` Continent Language
## <chr> <chr> <chr>
## 1 Western Asia Abaza
## 2 Southeastern Asia Acehnese
## 3 Western Asia Adyghe
## 4 Southeastern Asia Aklanon
## 5 Western Asia Assyrian Neo-Aramaic
## 6 Western Asia Avar
## 7 Western Asia Azeri
## 8 Southeastern Asia Balinese
## 9 Southeastern Asia Banguingui
## 10 Southeastern Asia Banjarese
## # ℹ 98 more rows
Unite two columns
table3_sep %>%
unite(col = `World Region`, c("World Region", "Continent"), sep = "/", )
## # A tibble: 108 × 2
## `World Region` Language
## <chr> <chr>
## 1 Western/Asia Abaza
## 2 Southeastern/Asia Acehnese
## 3 Western/Asia Adyghe
## 4 Southeastern/Asia Aklanon
## 5 Western/Asia Assyrian Neo-Aramaic
## 6 Western/Asia Avar
## 7 Western/Asia Azeri
## 8 Southeastern/Asia Balinese
## 9 Southeastern/Asia Banguingui
## 10 Southeastern/Asia Banjarese
## # ℹ 98 more rows
Missing Values
data <- tibble(
Language = c("Abaza", "Abaza", "Abaza", "Abaza","Abakua", "Abakua", "Abakua"),
`World Region` = c(1, 2, 3, 4, 2, 3, 4),
`Global Speakers` = c(1.88, 0.59, 0.35, NA, 0.92, 0.17, 2.66)
)
data %>%
pivot_wider(names_from = Language,
values_from = `Global Speakers`)
## # A tibble: 4 × 3
## `World Region` Abaza Abakua
## <dbl> <dbl> <dbl>
## 1 1 1.88 NA
## 2 2 0.59 0.92
## 3 3 0.35 0.17
## 4 4 NA 2.66