Import your data
myData <- readxl::read_xlsx("../01_module4/data/myData.xlsx")
myData
## # A tibble: 1,104 × 31
## town11cd town11nm population_2011 size_flag rgn11nm coastal coastal_detailed
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 E34000007 Carlton… 5456 Small To… East M… Non-co… Smaller non-coa…
## 2 E34000016 Dorches… 19060 Small To… South … Non-co… Smaller non-coa…
## 3 E34000020 Ely BUA 19090 Small To… East o… Non-co… Smaller non-coa…
## 4 E34000026 Market … 6429 Small To… Yorksh… Non-co… Smaller non-coa…
## 5 E34000027 Downham… 10884 Small To… East o… Non-co… Smaller non-coa…
## 6 E34000039 Penrith… 15181 Small To… North … Non-co… Smaller non-coa…
## 7 E34000048 Bolsove… 11754 Small To… East M… Non-co… Smaller non-coa…
## 8 E34000055 March B… 21051 Medium T… East o… Non-co… Large non-coast…
## 9 E34000056 Southam… 6567 Small To… West M… Non-co… Smaller non-coa…
## 10 E34000067 Royston… 15781 Small To… East o… Non-co… Smaller non-coa…
## # ℹ 1,094 more rows
## # ℹ 24 more variables: ttwa11cd <chr>, ttwa11nm <chr>,
## # ttwa_classification <chr>, job_density_flag <chr>, income_flag <chr>,
## # university_flag <chr>, level4qual_residents35_64_2011 <chr>,
## # ks4_2012_2013_counts <dbl>,
## # key_stage_2_attainment_school_year_2007_to_2008 <dbl>,
## # key_stage_4_attainment_school_year_2012_to_2013 <dbl>, …
set.seed(1234)
data_small <- myData %>%
select(education_score, population_2011, size_flag) %>%
sample_n(5)
data_small
## # A tibble: 5 × 3
## education_score population_2011 size_flag
## <dbl> <dbl> <chr>
## 1 1.42 7500 Small Towns
## 2 1.66 37164 Medium Towns
## 3 -0.0323 23325 Medium Towns
## 4 -0.114 88483 Large Towns
## 5 -1.31 34260 Medium Towns
Separating and Uniting
Unite two columns
data_united <- data_small %>%
unite("town_info", size_flag, population_2011, sep = "-")
Separate a column
data_united %>%
separate(town_info, into = c("size_flag", "population_2011"), sep = "-")
## # A tibble: 5 × 3
## education_score size_flag population_2011
## <dbl> <chr> <chr>
## 1 1.42 Small Towns 7500
## 2 1.66 Medium Towns 37164
## 3 -0.0323 Medium Towns 23325
## 4 -0.114 Large Towns 88483
## 5 -1.31 Medium Towns 34260
Missing Values
data_wide %>%
pivot_longer(cols = c('Small Towns', 'Medium Towns', 'Large Towns'), names_to = "size_flag", values_to = "population_2011", values_drop_na = TRUE)
## # A tibble: 5 × 3
## education_score size_flag population_2011
## <dbl> <chr> <dbl>
## 1 1.42 Small Towns 7500
## 2 1.66 Medium Towns 37164
## 3 -0.0323 Medium Towns 23325
## 4 -0.114 Large Towns 88483
## 5 -1.31 Medium Towns 34260
data_small %>%
complete(education_score, size_flag)
## # A tibble: 15 × 3
## education_score size_flag population_2011
## <dbl> <chr> <dbl>
## 1 -1.31 Large Towns NA
## 2 -1.31 Medium Towns 34260
## 3 -1.31 Small Towns NA
## 4 -0.114 Large Towns 88483
## 5 -0.114 Medium Towns NA
## 6 -0.114 Small Towns NA
## 7 -0.0323 Large Towns NA
## 8 -0.0323 Medium Towns 23325
## 9 -0.0323 Small Towns NA
## 10 1.42 Large Towns NA
## 11 1.42 Medium Towns NA
## 12 1.42 Small Towns 7500
## 13 1.66 Large Towns NA
## 14 1.66 Medium Towns 37164
## 15 1.66 Small Towns NA
data_small %>%
complete(education_score, size_flag) %>%
drop_na(population_2011)
## # A tibble: 5 × 3
## education_score size_flag population_2011
## <dbl> <chr> <dbl>
## 1 -1.31 Medium Towns 34260
## 2 -0.114 Large Towns 88483
## 3 -0.0323 Medium Towns 23325
## 4 1.42 Small Towns 7500
## 5 1.66 Medium Towns 37164