Import your data

# excel file
data <- read_excel("../01_module4/data/myData.xlsx")
data

## # A tibble: 193 × 9
##    hdi_rank_2023 country           human_development_in…¹ life_expectancy_at_b…²
##            <dbl> <chr>                              <dbl>                  <dbl>
##  1             1 Iceland                            0.972                   82.7
##  2             2 Norway                             0.97                    83.3
##  3             2 Switzerland                        0.97                    84.0
##  4             4 Denmark                            0.962                   81.9
##  5             5 Germany                            0.959                   81.4
##  6             5 Sweden                             0.959                   83.3
##  7             7 Australia                          0.958                   83.9
##  8             8 Hong Kong, China…                  0.955                   85.5
##  9             8 Netherlands                        0.955                   82.2
## 10            10 Belgium                            0.951                   82.1
## # ℹ 183 more rows
## # ℹ abbreviated names: ¹human_development_index_hdi, ²life_expectancy_at_birth
## # ℹ 5 more variables: expected_years_of_schooling <dbl>,
## #   mean_years_of_schooling <dbl>, gross_national_income_gni_per_capita <dbl>,
## #   gni_per_capita_rank_minus_hdi_rank <dbl>, hdi_rank_2022 <chr>

#Adding a variable 

data1 <- data%>%
    mutate(continent = countrycode (country, 
    origin = "country.name",
    destination = "continent"))

data1

## # A tibble: 193 × 10
##    hdi_rank_2023 country           human_development_in…¹ life_expectancy_at_b…²
##            <dbl> <chr>                              <dbl>                  <dbl>
##  1             1 Iceland                            0.972                   82.7
##  2             2 Norway                             0.97                    83.3
##  3             2 Switzerland                        0.97                    84.0
##  4             4 Denmark                            0.962                   81.9
##  5             5 Germany                            0.959                   81.4
##  6             5 Sweden                             0.959                   83.3
##  7             7 Australia                          0.958                   83.9
##  8             8 Hong Kong, China…                  0.955                   85.5
##  9             8 Netherlands                        0.955                   82.2
## 10            10 Belgium                            0.951                   82.1
## # ℹ 183 more rows
## # ℹ abbreviated names: ¹human_development_index_hdi, ²life_expectancy_at_birth
## # ℹ 6 more variables: expected_years_of_schooling <dbl>,
## #   mean_years_of_schooling <dbl>, gross_national_income_gni_per_capita <dbl>,
## #   gni_per_capita_rank_minus_hdi_rank <dbl>, hdi_rank_2022 <chr>,
## #   continent <chr>

Making the data small

set.seed (1234) # for reproducible outcome
data_small <- data1 %>%
    
# Select three columns
    select(country, continent, hdi_rank_2023) %>%
    
    # Randomly select five rows
    sample_n(5)

data_small

## # A tibble: 5 × 3
##   country  continent hdi_rank_2023
##   <chr>    <chr>             <dbl>
## 1 Spain    Europe               28
## 2 Grenada  Americas             80
## 3 Myanmar  Asia                150
## 4 Jordan   Asia                100
## 5 Botswana Africa              111

Pivoting

Long to wide form

dataS_wide <- data_small %>%
    
    pivot_wider(names_from = continent, values_from = hdi_rank_2023)

dataS_wide

## # A tibble: 5 × 5
##   country  Europe Americas  Asia Africa
##   <chr>     <dbl>    <dbl> <dbl>  <dbl>
## 1 Spain        28       NA    NA     NA
## 2 Grenada      NA       80    NA     NA
## 3 Myanmar      NA       NA   150     NA
## 4 Jordan       NA       NA   100     NA
## 5 Botswana     NA       NA    NA    111

Wide to long form

dataS_wide %>%
    
    pivot_longer(cols = -country,
                 names_to = "continent",
                 values_to = "hdi_rank_2023",
                 values_drop_na = TRUE)

## # A tibble: 5 × 3
##   country  continent hdi_rank_2023
##   <chr>    <chr>             <dbl>
## 1 Spain    Europe               28
## 2 Grenada  Americas             80
## 3 Myanmar  Asia                150
## 4 Jordan   Asia                100
## 5 Botswana Africa              111

Separating and Uniting

data_small

## # A tibble: 5 × 3
##   country  continent hdi_rank_2023
##   <chr>    <chr>             <dbl>
## 1 Spain    Europe               28
## 2 Grenada  Americas             80
## 3 Myanmar  Asia                150
## 4 Jordan   Asia                100
## 5 Botswana Africa              111

Unite two columns

dataS_unite <- data_small %>%
    
    unite(col = "location", country:continent, sep = ":")

dataS_unite

## # A tibble: 5 × 2
##   location         hdi_rank_2023
##   <chr>                    <dbl>
## 1 Spain:Europe                28
## 2 Grenada:Americas            80
## 3 Myanmar:Asia               150
## 4 Jordan:Asia                100
## 5 Botswana:Africa            111

Separate a column

dataS_unite %>%
    
    separate(location, into = c("country", "continent"))

## # A tibble: 5 × 3
##   country  continent hdi_rank_2023
##   <chr>    <chr>             <dbl>
## 1 Spain    Europe               28
## 2 Grenada  Americas             80
## 3 Myanmar  Asia                150
## 4 Jordan   Asia                100
## 5 Botswana Africa              111

Missing Values

No missing values, not applicable.

Week 8: Apply it to your data 7

Marlene Sophie Krohn

2026-11-03

Import your data

Making the data small

Pivoting

Long to wide form

Wide to long form

Separating and Uniting

Unite two columns

Separate a column

Missing Values