Import your data

myData <- readxl::read_xlsx("../01_module4/data/myData.xlsx")
myData
## # A tibble: 1,104 × 31
##    town11cd  town11nm population_2011 size_flag rgn11nm coastal coastal_detailed
##    <chr>     <chr>              <dbl> <chr>     <chr>   <chr>   <chr>           
##  1 E34000007 Carlton…            5456 Small To… East M… Non-co… Smaller non-coa…
##  2 E34000016 Dorches…           19060 Small To… South … Non-co… Smaller non-coa…
##  3 E34000020 Ely BUA            19090 Small To… East o… Non-co… Smaller non-coa…
##  4 E34000026 Market …            6429 Small To… Yorksh… Non-co… Smaller non-coa…
##  5 E34000027 Downham…           10884 Small To… East o… Non-co… Smaller non-coa…
##  6 E34000039 Penrith…           15181 Small To… North … Non-co… Smaller non-coa…
##  7 E34000048 Bolsove…           11754 Small To… East M… Non-co… Smaller non-coa…
##  8 E34000055 March B…           21051 Medium T… East o… Non-co… Large non-coast…
##  9 E34000056 Southam…            6567 Small To… West M… Non-co… Smaller non-coa…
## 10 E34000067 Royston…           15781 Small To… East o… Non-co… Smaller non-coa…
## # ℹ 1,094 more rows
## # ℹ 24 more variables: ttwa11cd <chr>, ttwa11nm <chr>,
## #   ttwa_classification <chr>, job_density_flag <chr>, income_flag <chr>,
## #   university_flag <chr>, level4qual_residents35_64_2011 <chr>,
## #   ks4_2012_2013_counts <dbl>,
## #   key_stage_2_attainment_school_year_2007_to_2008 <dbl>,
## #   key_stage_4_attainment_school_year_2012_to_2013 <dbl>, …
set.seed(1234)

data_small <- myData %>%
    select(education_score, population_2011, size_flag) %>%
    sample_n(5)
data_small
## # A tibble: 5 × 3
##   education_score population_2011 size_flag   
##             <dbl>           <dbl> <chr>       
## 1          1.42              7500 Small Towns 
## 2          1.66             37164 Medium Towns
## 3         -0.0323           23325 Medium Towns
## 4         -0.114            88483 Large Towns 
## 5         -1.31             34260 Medium Towns

Pivoting

long to wide form

data_wide <- data_small %>%
    pivot_wider(names_from = size_flag, values_from = population_2011)

wide to long form

data_wide %>%
    pivot_longer(cols = c('Small Towns', 'Medium Towns', 'Large Towns'), names_to = "size_flag", values_to = "population_2011")
## # A tibble: 15 × 3
##    education_score size_flag    population_2011
##              <dbl> <chr>                  <dbl>
##  1          1.42   Small Towns             7500
##  2          1.42   Medium Towns              NA
##  3          1.42   Large Towns               NA
##  4          1.66   Small Towns               NA
##  5          1.66   Medium Towns           37164
##  6          1.66   Large Towns               NA
##  7         -0.0323 Small Towns               NA
##  8         -0.0323 Medium Towns           23325
##  9         -0.0323 Large Towns               NA
## 10         -0.114  Small Towns               NA
## 11         -0.114  Medium Towns              NA
## 12         -0.114  Large Towns            88483
## 13         -1.31   Small Towns               NA
## 14         -1.31   Medium Towns           34260
## 15         -1.31   Large Towns               NA

Separating and Uniting

Unite two columns

data_united <- data_small %>%
    unite("town_info", size_flag, population_2011, sep = "-")

Separate a column

data_united %>%
    separate(town_info, into = c("size_flag", "population_2011"), sep = "-")
## # A tibble: 5 × 3
##   education_score size_flag    population_2011
##             <dbl> <chr>        <chr>          
## 1          1.42   Small Towns  7500           
## 2          1.66   Medium Towns 37164          
## 3         -0.0323 Medium Towns 23325          
## 4         -0.114  Large Towns  88483          
## 5         -1.31   Medium Towns 34260

Missing Values

data_wide %>%
    pivot_longer(cols = c('Small Towns', 'Medium Towns', 'Large Towns'), names_to = "size_flag", values_to = "population_2011", values_drop_na = TRUE)
## # A tibble: 5 × 3
##   education_score size_flag    population_2011
##             <dbl> <chr>                  <dbl>
## 1          1.42   Small Towns             7500
## 2          1.66   Medium Towns           37164
## 3         -0.0323 Medium Towns           23325
## 4         -0.114  Large Towns            88483
## 5         -1.31   Medium Towns           34260
data_small %>%
    complete(education_score, size_flag)
## # A tibble: 15 × 3
##    education_score size_flag    population_2011
##              <dbl> <chr>                  <dbl>
##  1         -1.31   Large Towns               NA
##  2         -1.31   Medium Towns           34260
##  3         -1.31   Small Towns               NA
##  4         -0.114  Large Towns            88483
##  5         -0.114  Medium Towns              NA
##  6         -0.114  Small Towns               NA
##  7         -0.0323 Large Towns               NA
##  8         -0.0323 Medium Towns           23325
##  9         -0.0323 Small Towns               NA
## 10          1.42   Large Towns               NA
## 11          1.42   Medium Towns              NA
## 12          1.42   Small Towns             7500
## 13          1.66   Large Towns               NA
## 14          1.66   Medium Towns           37164
## 15          1.66   Small Towns               NA
data_small %>%
    complete(education_score, size_flag) %>%
    drop_na(population_2011)
## # A tibble: 5 × 3
##   education_score size_flag    population_2011
##             <dbl> <chr>                  <dbl>
## 1         -1.31   Medium Towns           34260
## 2         -0.114  Large Towns            88483
## 3         -0.0323 Medium Towns           23325
## 4          1.42   Small Towns             7500
## 5          1.66   Medium Towns           37164