# excel file
data <- read_excel("../01_module4/data/myData.xlsx")
data
## # A tibble: 193 × 9
## hdi_rank_2023 country human_development_in…¹ life_expectancy_at_b…²
## <dbl> <chr> <dbl> <dbl>
## 1 1 Iceland 0.972 82.7
## 2 2 Norway 0.97 83.3
## 3 2 Switzerland 0.97 84.0
## 4 4 Denmark 0.962 81.9
## 5 5 Germany 0.959 81.4
## 6 5 Sweden 0.959 83.3
## 7 7 Australia 0.958 83.9
## 8 8 Hong Kong, China… 0.955 85.5
## 9 8 Netherlands 0.955 82.2
## 10 10 Belgium 0.951 82.1
## # ℹ 183 more rows
## # ℹ abbreviated names: ¹human_development_index_hdi, ²life_expectancy_at_birth
## # ℹ 5 more variables: expected_years_of_schooling <dbl>,
## # mean_years_of_schooling <dbl>, gross_national_income_gni_per_capita <dbl>,
## # gni_per_capita_rank_minus_hdi_rank <dbl>, hdi_rank_2022 <chr>
#Adding a variable
data1 <- data%>%
mutate(continent = countrycode (country,
origin = "country.name",
destination = "continent"))
data1
## # A tibble: 193 × 10
## hdi_rank_2023 country human_development_in…¹ life_expectancy_at_b…²
## <dbl> <chr> <dbl> <dbl>
## 1 1 Iceland 0.972 82.7
## 2 2 Norway 0.97 83.3
## 3 2 Switzerland 0.97 84.0
## 4 4 Denmark 0.962 81.9
## 5 5 Germany 0.959 81.4
## 6 5 Sweden 0.959 83.3
## 7 7 Australia 0.958 83.9
## 8 8 Hong Kong, China… 0.955 85.5
## 9 8 Netherlands 0.955 82.2
## 10 10 Belgium 0.951 82.1
## # ℹ 183 more rows
## # ℹ abbreviated names: ¹human_development_index_hdi, ²life_expectancy_at_birth
## # ℹ 6 more variables: expected_years_of_schooling <dbl>,
## # mean_years_of_schooling <dbl>, gross_national_income_gni_per_capita <dbl>,
## # gni_per_capita_rank_minus_hdi_rank <dbl>, hdi_rank_2022 <chr>,
## # continent <chr>
set.seed (1234) # for reproducible outcome
data_small <- data1 %>%
# Select three columns
select(country, continent, hdi_rank_2023) %>%
# Randomly select five rows
sample_n(5)
data_small
## # A tibble: 5 × 3
## country continent hdi_rank_2023
## <chr> <chr> <dbl>
## 1 Spain Europe 28
## 2 Grenada Americas 80
## 3 Myanmar Asia 150
## 4 Jordan Asia 100
## 5 Botswana Africa 111
dataS_wide <- data_small %>%
pivot_wider(names_from = continent, values_from = hdi_rank_2023)
dataS_wide
## # A tibble: 5 × 5
## country Europe Americas Asia Africa
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Spain 28 NA NA NA
## 2 Grenada NA 80 NA NA
## 3 Myanmar NA NA 150 NA
## 4 Jordan NA NA 100 NA
## 5 Botswana NA NA NA 111
dataS_wide %>%
pivot_longer(cols = -country,
names_to = "continent",
values_to = "hdi_rank_2023",
values_drop_na = TRUE)
## # A tibble: 5 × 3
## country continent hdi_rank_2023
## <chr> <chr> <dbl>
## 1 Spain Europe 28
## 2 Grenada Americas 80
## 3 Myanmar Asia 150
## 4 Jordan Asia 100
## 5 Botswana Africa 111
data_small
## # A tibble: 5 × 3
## country continent hdi_rank_2023
## <chr> <chr> <dbl>
## 1 Spain Europe 28
## 2 Grenada Americas 80
## 3 Myanmar Asia 150
## 4 Jordan Asia 100
## 5 Botswana Africa 111
dataS_unite <- data_small %>%
unite(col = "location", country:continent, sep = ":")
dataS_unite
## # A tibble: 5 × 2
## location hdi_rank_2023
## <chr> <dbl>
## 1 Spain:Europe 28
## 2 Grenada:Americas 80
## 3 Myanmar:Asia 150
## 4 Jordan:Asia 100
## 5 Botswana:Africa 111
dataS_unite %>%
separate(location, into = c("country", "continent"))
## # A tibble: 5 × 3
## country continent hdi_rank_2023
## <chr> <chr> <dbl>
## 1 Spain Europe 28
## 2 Grenada Americas 80
## 3 Myanmar Asia 150
## 4 Jordan Asia 100
## 5 Botswana Africa 111
No missing values, not applicable.