Goal is to predict attrition, employees who are likely to leave the company.

Import Data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(correlationfunnel)
## ══ correlationfunnel Tip #1 ════════════════════════════════════════════════════
## Make sure your data is not overly imbalanced prior to using `correlate()`.
## If less than 5% imbalance, consider sampling. :)
library(dplyr)

data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore data

skimr::skim(data)
Data summary
Name data
Number of rows 4191
Number of columns 35
_______________________
Column type frequency:
character 24
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
museum_id 0 1.00 8 15 0 4191 0
Name_of_museum 0 1.00 3 76 0 4190 0
Address_line_1 441 0.89 3 61 0 3212 0
Address_line_2 2816 0.33 3 39 0 1167 0
Village,_Town_or_City 4 1.00 3 24 0 1696 0
Postcode 0 1.00 6 9 0 3918 0
Admin_area 0 1.00 12 137 0 393 0
Accreditation 0 1.00 10 12 0 2 0
Governance 0 1.00 7 41 0 13 0
Size 0 1.00 4 7 0 5 0
Size_provenance 179 0.96 2 29 0 16 0
Subject_Matter 0 1.00 5 45 0 114 0
Year_opened 0 1.00 9 9 0 351 0
Year_closed 0 1.00 9 9 0 170 0
DOMUS_Subject_Matter 2788 0.33 5 27 0 21 0
Primary_provenance_of_data 0 1.00 3 8 0 18 0
Identifier_used_in_primary_data_source 2056 0.51 2 8 0 2134 0
Area_Geodemographic_group 49 0.99 11 40 0 17 0
Area_Geodemographic_group_code 49 0.99 3 3 0 16 0
Area_Geodemographic_subgroup 49 0.99 12 39 0 25 0
Area_Geodemographic_subgroup_code 49 0.99 4 4 0 24 0
Area_Geodemographic_supergroup 49 0.99 16 39 0 8 0
Area_Geodemographic_supergroup_code 49 0.99 2 2 0 8 0
Notes 2980 0.29 12 751 0 956 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Latitude 0 1.00 52.93 2.09 49.18 51.48 52.47 53.96 100.00 ▇▁▁▁▁
Longitude 0 1.00 -1.96 1.84 -8.09 -3.10 -1.87 -0.48 1.76 ▁▂▇▇▅
DOMUS_identifier 2347 0.44 1303.45 1597.19 1.00 486.50 991.50 1470.25 7746.00 ▇▂▁▁▁
Area_Deprivation_index 49 0.99 5.44 2.48 1.00 4.00 5.00 7.00 10.00 ▃▆▇▆▃
Area_Deprivation_index_crime 49 0.99 5.43 3.07 1.00 3.00 6.00 8.00 10.00 ▇▆▅▇▇
Area_Deprivation_index_education 49 0.99 6.04 2.61 1.00 4.00 6.00 8.00 10.00 ▃▅▇▇▆
Area_Deprivation_index_employment 49 0.99 6.08 2.76 1.00 4.00 6.00 8.00 10.00 ▅▆▇▇▇
Area_Deprivation_index_health 49 0.99 6.02 2.82 1.00 4.00 6.00 8.00 10.00 ▅▆▆▇▇
Area_Deprivation_index_housing 49 0.99 3.97 2.75 1.00 1.00 3.00 6.00 10.00 ▇▅▃▂▂
Area_Deprivation_index_income 49 0.99 5.99 2.62 1.00 4.00 6.00 8.00 10.00 ▃▆▇▇▆
Area_Deprivation_index_services 49 0.99 4.78 3.01 1.00 2.00 4.00 7.00 10.00 ▇▅▅▅▅

Issues with data * Missing values * Factors or numeric variables * Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code, Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services * Zero variance variables * Character variables: Convert them to numbers in recipe step * Unbalanced target variable: Accreditation * ID variable: museum_id

# Treat missing values and clean data.

data_clean <- data %>% 
    
    select(-c(Size_provenance, DOMUS_Subject_Matter, Year_closed, Primary_provenance_of_data, Identifier_used_in_primary_data_source, Area_Geodemographic_subgroup, Area_Geodemographic_subgroup_code, Area_Geodemographic_supergroup, Area_Geodemographic_supergroup_code, Notes, Latitude, Longitude, DOMUS_identifier)) %>%
    
    na.omit()

glimpse(data_clean)
## Rows: 1,358
## Columns: 22
## $ museum_id                         <chr> "mm.domus.WM019", "mm.aim.0485", "mm…
## $ Name_of_museum                    <chr> "Warwickshire Museum Of Rural Life",…
## $ Address_line_1                    <chr> "Warwick College of Agriculture", "5…
## $ Address_line_2                    <chr> "Horticulture & Equine Studies", "Bi…
## $ `Village,_Town_or_City`           <chr> "Moreton Morrell", "Cheltenham", "Lo…
## $ Postcode                          <chr> "CV35 9BL", "GL52 8TA", "NW1 7NB", "…
## $ Admin_area                        <chr> "/England/West Midlands (English Reg…
## $ Accreditation                     <chr> "Unaccredited", "Accredited", "Unacc…
## $ Governance                        <chr> "Government-Local_Authority", "Indep…
## $ Size                              <chr> "medium", "medium", "small", "small"…
## $ Subject_Matter                    <chr> "Rural_Industry-Farming", "War_and_c…
## $ Year_opened                       <chr> "1984:1984", "2013:2013", "1996:1996…
## $ Area_Deprivation_index            <dbl> 8, 8, 2, 6, 7, 6, 9, 1, 2, 6, 4, 4, …
## $ Area_Deprivation_index_crime      <dbl> 9, 10, 1, 10, 7, 7, 8, 3, 2, 9, 6, 4…
## $ Area_Deprivation_index_education  <dbl> 8, 7, 6, 6, 7, 4, 8, 1, 2, 10, 3, 6,…
## $ Area_Deprivation_index_employment <dbl> 10, 7, 3, 7, 7, 5, 9, 1, 2, 10, 3, 4…
## $ Area_Deprivation_index_health     <dbl> 8, 8, 2, 7, 9, 6, 9, 1, 3, 6, 5, 2, …
## $ Area_Deprivation_index_housing    <dbl> 5, 7, 1, 7, 7, 10, 8, 2, 5, 1, 10, 1…
## $ Area_Deprivation_index_income     <dbl> 8, 8, 3, 5, 8, 5, 9, 1, 1, 10, 3, 5,…
## $ Area_Deprivation_index_services   <dbl> 1, 4, 4, 1, 1, 8, 4, 4, 1, 2, 9, 10,…
## $ Area_Geodemographic_group         <chr> "English and Welsh Countryside", "Co…
## $ Area_Geodemographic_group_code    <chr> "3ar", "7ar", "5ar", "3cr", "3cr", "…
factors_vec <- data_clean %>% select(Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code,
    Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment,
    Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services) %>% names()

data_clean <- data_clean %>%
    # Address factors imported as numeric
    mutate(across(all_of(factors_vec), as.factor))

Explore data

Accredited vs. Unaccredited

data_clean %>% count(Accreditation)
## # A tibble: 2 × 2
##   Accreditation     n
##   <chr>         <int>
## 1 Accredited      686
## 2 Unaccredited    672
data_clean %>%
    ggplot(aes(Accreditation)) +
    geom_bar()

Most common Subjects

data_clean %>% count(Subject_Matter) %>% arrange(desc(n))
## # A tibble: 103 × 2
##    Subject_Matter                    n
##    <fct>                         <int>
##  1 Local_Histories                 348
##  2 Arts-Fine_and_decorative_arts    69
##  3 War_and_conflict-Regiment        69
##  4 Buildings-Houses-Large_houses    46
##  5 Transport-Trains_and_railways    46
##  6 Mixed-Other                      35
##  7 Mixed-Encyclopaedic              33
##  8 War_and_conflict-Airforce        30
##  9 Personality-Literary             22
## 10 Transport-Cars_and_motorbikes    22
## # ℹ 93 more rows
data_clean %>%
    ggplot(aes(x= Subject_Matter)) +
    geom_bar()

Years museums opened

data_clean %>%
    ggplot(aes(Year_opened, Accreditation)) +
    geom_count()

correlation plot

data_clean <- na.omit(data_clean)

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-museum_id) %>%
    binarize()

data_binarized %>% glimpse()
## Rows: 1,358
## Columns: 211
## $ Name_of_museum__100th_Bomb_Group_Memorial_Museum                                                                          <dbl> …
## $ `Name_of_museum__-OTHER`                                                                                                  <dbl> …
## $ Address_line_1__Town_Hall                                                                                                 <dbl> …
## $ `Address_line_1__-OTHER`                                                                                                  <dbl> …
## $ Address_line_2__High_Street                                                                                               <dbl> …
## $ Address_line_2__Market_Place                                                                                              <dbl> …
## $ `Address_line_2__-OTHER`                                                                                                  <dbl> …
## $ `Village,_Town_or_City__Birmingham`                                                                                       <dbl> …
## $ `Village,_Town_or_City__Bristol`                                                                                          <dbl> …
## $ `Village,_Town_or_City__Edinburgh`                                                                                        <dbl> …
## $ `Village,_Town_or_City__Glasgow`                                                                                          <dbl> …
## $ `Village,_Town_or_City__London`                                                                                           <dbl> …
## $ `Village,_Town_or_City__Manchester`                                                                                       <dbl> …
## $ `Village,_Town_or_City__-OTHER`                                                                                           <dbl> …
## $ Postcode__SO23_8TS                                                                                                        <dbl> …
## $ `Postcode__-OTHER`                                                                                                        <dbl> …
## $ `Admin_area__/England/London_(English_Region)/Camden_(London_Borough)`                                                    <dbl> …
## $ `Admin_area__/England/London_(English_Region)/Westminster_(London_Borough)`                                               <dbl> …
## $ `Admin_area__/England/South_East_(English_Region)/Hampshire_(English_County)/Winchester_(English_District_or_Borough)`    <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Cornwall_(English_UA)`                                                  <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Wiltshire_(English_UA)`                                                 <dbl> …
## $ `Admin_area__/England/West_Midlands_(English_Region)/West_Midlands_(English_CA)/Birmingham_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/Scotland/City_of_Edinburgh_(Scottish_Council_Area)`                                                         <dbl> …
## $ `Admin_area__/Scotland/Fife_(Scottish_Council_Area)`                                                                      <dbl> …
## $ `Admin_area__/Scotland/Highland_(Scottish_Council_Area)`                                                                  <dbl> …
## $ `Admin_area__-OTHER`                                                                                                      <dbl> …
## $ Accreditation__Accredited                                                                                                 <dbl> …
## $ Accreditation__Unaccredited                                                                                               <dbl> …
## $ `Governance__Government-Local_Authority`                                                                                  <dbl> …
## $ `Governance__Government-National`                                                                                         <dbl> …
## $ `Governance__Independent-National_Trust`                                                                                  <dbl> …
## $ `Governance__Independent-Not_for_profit`                                                                                  <dbl> …
## $ `Governance__Independent-Private`                                                                                         <dbl> …
## $ `Governance__Independent-Unknown`                                                                                         <dbl> …
## $ Governance__University                                                                                                    <dbl> …
## $ Governance__Unknown                                                                                                       <dbl> …
## $ `Governance__-OTHER`                                                                                                      <dbl> …
## $ Size__large                                                                                                               <dbl> …
## $ Size__medium                                                                                                              <dbl> …
## $ Size__small                                                                                                               <dbl> …
## $ Size__unknown                                                                                                             <dbl> …
## $ `Size__-OTHER`                                                                                                            <dbl> …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts`                                                                           <dbl> …
## $ `Subject_Matter__Buildings-Houses-Large_houses`                                                                           <dbl> …
## $ `Subject_Matter__Buildings-Houses-Medium_houses`                                                                          <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying`                                                           <dbl> …
## $ Subject_Matter__Local_Histories                                                                                           <dbl> …
## $ `Subject_Matter__Mixed-Encyclopaedic`                                                                                     <dbl> …
## $ `Subject_Matter__Mixed-Other`                                                                                             <dbl> …
## $ Subject_Matter__Other                                                                                                     <dbl> …
## $ `Subject_Matter__Personality-Literary`                                                                                    <dbl> …
## $ `Subject_Matter__Rural_Industry-Farming`                                                                                  <dbl> …
## $ `Subject_Matter__Sea_and_seafaring-Mixed`                                                                                 <dbl> …
## $ `Subject_Matter__Transport-Cars_and_motorbikes`                                                                           <dbl> …
## $ `Subject_Matter__Transport-Trains_and_railways`                                                                           <dbl> …
## $ `Subject_Matter__War_and_conflict-Airforce`                                                                               <dbl> …
## $ `Subject_Matter__War_and_conflict-Event_or_site`                                                                          <dbl> …
## $ `Subject_Matter__War_and_conflict-Regiment`                                                                               <dbl> …
## $ `Subject_Matter__-OTHER`                                                                                                  <dbl> …
## $ `Year_opened__1951:1951`                                                                                                  <dbl> …
## $ `Year_opened__1960:2017`                                                                                                  <dbl> …
## $ `Year_opened__1968:1968`                                                                                                  <dbl> …
## $ `Year_opened__1969:1969`                                                                                                  <dbl> …
## $ `Year_opened__1972:1972`                                                                                                  <dbl> …
## $ `Year_opened__1973:1973`                                                                                                  <dbl> …
## $ `Year_opened__1974:1974`                                                                                                  <dbl> …
## $ `Year_opened__1975:1975`                                                                                                  <dbl> …
## $ `Year_opened__1976:1976`                                                                                                  <dbl> …
## $ `Year_opened__1977:1977`                                                                                                  <dbl> …
## $ `Year_opened__1978:1978`                                                                                                  <dbl> …
## $ `Year_opened__1979:1979`                                                                                                  <dbl> …
## $ `Year_opened__1980:1980`                                                                                                  <dbl> …
## $ `Year_opened__1981:1981`                                                                                                  <dbl> …
## $ `Year_opened__1982:1982`                                                                                                  <dbl> …
## $ `Year_opened__1983:1983`                                                                                                  <dbl> …
## $ `Year_opened__1984:1984`                                                                                                  <dbl> …
## $ `Year_opened__1985:1985`                                                                                                  <dbl> …
## $ `Year_opened__1986:1986`                                                                                                  <dbl> …
## $ `Year_opened__1987:1987`                                                                                                  <dbl> …
## $ `Year_opened__1988:1988`                                                                                                  <dbl> …
## $ `Year_opened__1989:1989`                                                                                                  <dbl> …
## $ `Year_opened__1990:1990`                                                                                                  <dbl> …
## $ `Year_opened__1991:1991`                                                                                                  <dbl> …
## $ `Year_opened__1992:1992`                                                                                                  <dbl> …
## $ `Year_opened__1993:1993`                                                                                                  <dbl> …
## $ `Year_opened__1994:1994`                                                                                                  <dbl> …
## $ `Year_opened__1995:1995`                                                                                                  <dbl> …
## $ `Year_opened__1996:1996`                                                                                                  <dbl> …
## $ `Year_opened__1997:1997`                                                                                                  <dbl> …
## $ `Year_opened__1998:1998`                                                                                                  <dbl> …
## $ `Year_opened__1999:1999`                                                                                                  <dbl> …
## $ `Year_opened__2002:2002`                                                                                                  <dbl> …
## $ `Year_opened__2005:2005`                                                                                                  <dbl> …
## $ `Year_opened__2008:2008`                                                                                                  <dbl> …
## $ `Year_opened__2012:2012`                                                                                                  <dbl> …
## $ `Year_opened__2013:2013`                                                                                                  <dbl> …
## $ `Year_opened__2014:2014`                                                                                                  <dbl> …
## $ `Year_opened__-OTHER`                                                                                                     <dbl> …
## $ Area_Deprivation_index__1                                                                                                 <dbl> …
## $ Area_Deprivation_index__2                                                                                                 <dbl> …
## $ Area_Deprivation_index__3                                                                                                 <dbl> …
## $ Area_Deprivation_index__4                                                                                                 <dbl> …
## $ Area_Deprivation_index__5                                                                                                 <dbl> …
## $ Area_Deprivation_index__6                                                                                                 <dbl> …
## $ Area_Deprivation_index__7                                                                                                 <dbl> …
## $ Area_Deprivation_index__8                                                                                                 <dbl> …
## $ Area_Deprivation_index__9                                                                                                 <dbl> …
## $ Area_Deprivation_index__10                                                                                                <dbl> …
## $ Area_Deprivation_index_crime__1                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__2                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__3                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__4                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__5                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__6                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__7                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__8                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__9                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__10                                                                                          <dbl> …
## $ Area_Deprivation_index_education__1                                                                                       <dbl> …
## $ Area_Deprivation_index_education__2                                                                                       <dbl> …
## $ Area_Deprivation_index_education__3                                                                                       <dbl> …
## $ Area_Deprivation_index_education__4                                                                                       <dbl> …
## $ Area_Deprivation_index_education__5                                                                                       <dbl> …
## $ Area_Deprivation_index_education__6                                                                                       <dbl> …
## $ Area_Deprivation_index_education__7                                                                                       <dbl> …
## $ Area_Deprivation_index_education__8                                                                                       <dbl> …
## $ Area_Deprivation_index_education__9                                                                                       <dbl> …
## $ Area_Deprivation_index_education__10                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__1                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__2                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__3                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__4                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__5                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__6                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__7                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__8                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__9                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__10                                                                                     <dbl> …
## $ Area_Deprivation_index_health__1                                                                                          <dbl> …
## $ Area_Deprivation_index_health__2                                                                                          <dbl> …
## $ Area_Deprivation_index_health__3                                                                                          <dbl> …
## $ Area_Deprivation_index_health__4                                                                                          <dbl> …
## $ Area_Deprivation_index_health__5                                                                                          <dbl> …
## $ Area_Deprivation_index_health__6                                                                                          <dbl> …
## $ Area_Deprivation_index_health__7                                                                                          <dbl> …
## $ Area_Deprivation_index_health__8                                                                                          <dbl> …
## $ Area_Deprivation_index_health__9                                                                                          <dbl> …
## $ Area_Deprivation_index_health__10                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__1                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__2                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__3                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__4                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__5                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__6                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__7                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__8                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__9                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__10                                                                                        <dbl> …
## $ Area_Deprivation_index_income__1                                                                                          <dbl> …
## $ Area_Deprivation_index_income__2                                                                                          <dbl> …
## $ Area_Deprivation_index_income__3                                                                                          <dbl> …
## $ Area_Deprivation_index_income__4                                                                                          <dbl> …
## $ Area_Deprivation_index_income__5                                                                                          <dbl> …
## $ Area_Deprivation_index_income__6                                                                                          <dbl> …
## $ Area_Deprivation_index_income__7                                                                                          <dbl> …
## $ Area_Deprivation_index_income__8                                                                                          <dbl> …
## $ Area_Deprivation_index_income__9                                                                                          <dbl> …
## $ Area_Deprivation_index_income__10                                                                                         <dbl> …
## $ Area_Deprivation_index_services__1                                                                                        <dbl> …
## $ Area_Deprivation_index_services__2                                                                                        <dbl> …
## $ Area_Deprivation_index_services__3                                                                                        <dbl> …
## $ Area_Deprivation_index_services__4                                                                                        <dbl> …
## $ Area_Deprivation_index_services__5                                                                                        <dbl> …
## $ Area_Deprivation_index_services__6                                                                                        <dbl> …
## $ Area_Deprivation_index_services__7                                                                                        <dbl> …
## $ Area_Deprivation_index_services__8                                                                                        <dbl> …
## $ Area_Deprivation_index_services__9                                                                                        <dbl> …
## $ Area_Deprivation_index_services__10                                                                                       <dbl> …
## $ Area_Geodemographic_group__Country_Living                                                                                 <dbl> …
## $ Area_Geodemographic_group__English_and_Welsh_Countryside                                                                  <dbl> …
## $ Area_Geodemographic_group__Ethnically_Diverse_Metropolitan_Living                                                         <dbl> …
## $ Area_Geodemographic_group__Larger_Towns_and_Cities                                                                        <dbl> …
## $ Area_Geodemographic_group__London_Cosmopolitan                                                                            <dbl> …
## $ Area_Geodemographic_group__Manufacturing_Traits                                                                           <dbl> …
## $ Area_Geodemographic_group__Northern_Ireland_Countryside                                                                   <dbl> …
## $ Area_Geodemographic_group__Remoter_Coastal_Living                                                                         <dbl> …
## $ `Area_Geodemographic_group__Rural-Urban_Fringe`                                                                           <dbl> …
## $ Area_Geodemographic_group__Scottish_Countryside                                                                           <dbl> …
## $ Area_Geodemographic_group__Scottish_Industrial_Heritage                                                                   <dbl> …
## $ Area_Geodemographic_group__Services_Manufacturing_and_Mining_Legacy                                                       <dbl> …
## $ Area_Geodemographic_group__Suburban_Traits                                                                                <dbl> …
## $ Area_Geodemographic_group__Thriving_Rural                                                                                 <dbl> …
## $ Area_Geodemographic_group__Town_Living                                                                                    <dbl> …
## $ Area_Geodemographic_group__University_Towns_and_Cities                                                                    <dbl> …
## $ `Area_Geodemographic_group__-OTHER`                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__1ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__1br                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__2ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__2br                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__3ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__3br                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__3cr                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__4ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__5ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__6ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__6br                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__7ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__7br                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__7cr                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__8ar                                                                                       <dbl> …
## $ Area_Geodemographic_group_code__8br                                                                                       <dbl> …
# Step 2: correlate
data_correlation <- data_binarized %>%
    correlate(Accreditation__Accredited)

data_correlation
## # A tibble: 211 × 3
##    feature        bin                        correlation
##    <fct>          <chr>                            <dbl>
##  1 Accreditation  Accredited                       1    
##  2 Accreditation  Unaccredited                    -1    
##  3 Governance     Independent-Private             -0.300
##  4 Size           small                           -0.243
##  5 Governance     Independent-Unknown             -0.222
##  6 Size           medium                           0.211
##  7 Size           large                            0.187
##  8 Size           unknown                         -0.177
##  9 Governance     Government-Local_Authority       0.162
## 10 Subject_Matter -OTHER                          -0.149
## # ℹ 201 more rows
# Step 3: Plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 191 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Model Building

Split Data

library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.5      ✔ rsample      1.2.1 
## ✔ dials        1.2.1      ✔ tune         1.2.1 
## ✔ infer        1.0.7      ✔ workflows    1.1.4 
## ✔ modeldata    1.4.0      ✔ workflowsets 1.1.0 
## ✔ parsnip      1.2.1      ✔ yardstick    1.3.1 
## ✔ recipes      1.0.10
## Warning: package 'modeldata' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
set.seed(1234)
data_clean <- data_clean %>% sample_n(100)

data_split <- initial_split(data_clean, strata = Accreditation)
data_train <- training(data_split)
data_test <- testing(data_split)

data_cv <- rsample::vfold_cv(data_train, strata = Accreditation)
data_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [67/8]> Fold01
##  2 <split [67/8]> Fold02
##  3 <split [67/8]> Fold03
##  4 <split [67/8]> Fold04
##  5 <split [67/8]> Fold05
##  6 <split [67/8]> Fold06
##  7 <split [68/7]> Fold07
##  8 <split [68/7]> Fold08
##  9 <split [68/7]> Fold09
## 10 <split [69/6]> Fold10

{r} # library(usemodels) # use_xgboost(like_count ~ ., data = data_train) #

Preprocess data

skimr::skim(data_clean)
Data summary
Name data_clean
Number of rows 100
Number of columns 22
_______________________
Column type frequency:
character 9
factor 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
museum_id 0 1 9 14 0 100 0
Name_of_museum 0 1 6 60 0 100 0
Address_line_1 0 1 5 44 0 99 0
Address_line_2 0 1 4 27 0 93 0
Village,_Town_or_City 0 1 3 18 0 84 0
Postcode 0 1 6 8 0 99 0
Admin_area 0 1 23 125 0 77 0
Accreditation 0 1 10 12 0 2 0
Year_opened 0 1 9 9 0 60 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Governance 0 1 FALSE 6 Ind: 41, Gov: 34, Ind: 9, Uni: 8
Size 0 1 FALSE 4 sma: 55, med: 27, lar: 16, unk: 2
Subject_Matter 0 1 FALSE 45 Loc: 21, Art: 8, War: 6, Mix: 5
Area_Deprivation_index 0 1 FALSE 10 5: 15, 6: 15, 2: 13, 10: 11
Area_Deprivation_index_crime 0 1 FALSE 10 2: 14, 1: 13, 4: 13, 8: 12
Area_Deprivation_index_education 0 1 FALSE 10 7: 14, 3: 13, 6: 12, 2: 11
Area_Deprivation_index_employment 0 1 FALSE 10 7: 19, 10: 14, 4: 12, 2: 11
Area_Deprivation_index_health 0 1 FALSE 10 9: 15, 5: 13, 8: 12, 2: 11
Area_Deprivation_index_housing 0 1 FALSE 10 1: 22, 2: 13, 5: 13, 4: 11
Area_Deprivation_index_income 0 1 FALSE 10 7: 14, 6: 13, 2: 11, 5: 11
Area_Deprivation_index_services 0 1 FALSE 10 1: 17, 8: 15, 9: 13, 5: 11
Area_Geodemographic_group 0 1 FALSE 15 Cou: 16, Eng: 12, Sco: 12, Lar: 10
Area_Geodemographic_group_code 0 1 FALSE 15 7ar: 16, 3ar: 12, 3cr: 12, 2ar: 10
library(themis)
library(recipes)
library(textrecipes)

xgboost_recipe <- recipes::recipe(Accreditation ~ ., data = data_train) %>%
    update_role(museum_id, new_role = "ID") %>%
    step_tokenize(Admin_area) %>%
    step_tokenfilter(Admin_area, max_tokens = 50) %>%
    step_tf(Admin_area) %>%
    step_dummy(all_nominal_predictors())
    

xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 677
## $ museum_id                                                                   <fct> …
## $ Accreditation                                                               <fct> …
## $ tf_Admin_area_and                                                           <int> …
## $ tf_Admin_area_area                                                          <int> …
## $ tf_Admin_area_borough                                                       <int> …
## $ tf_Admin_area_ca                                                            <int> …
## $ tf_Admin_area_city                                                          <int> …
## $ tf_Admin_area_cornwall                                                      <int> …
## $ tf_Admin_area_council                                                       <int> …
## $ tf_Admin_area_county                                                        <int> …
## $ tf_Admin_area_devon                                                         <int> …
## $ tf_Admin_area_district                                                      <int> …
## $ tf_Admin_area_doncaster                                                     <int> …
## $ tf_Admin_area_dorset                                                        <int> …
## $ tf_Admin_area_east                                                          <int> …
## $ tf_Admin_area_edinburgh                                                     <int> …
## $ tf_Admin_area_eileanan                                                      <int> …
## $ tf_Admin_area_england                                                       <int> …
## $ tf_Admin_area_english                                                       <int> …
## $ tf_Admin_area_essex                                                         <int> …
## $ tf_Admin_area_guildford                                                     <int> …
## $ tf_Admin_area_h                                                             <int> …
## $ tf_Admin_area_hampshire                                                     <int> …
## $ tf_Admin_area_highland                                                      <int> …
## $ tf_Admin_area_holland                                                       <int> …
## $ tf_Admin_area_humber                                                        <int> …
## $ tf_Admin_area_lincolnshire                                                  <int> …
## $ tf_Admin_area_liverpool                                                     <int> …
## $ tf_Admin_area_london                                                        <int> …
## $ tf_Admin_area_maldon                                                        <int> …
## $ tf_Admin_area_midlands                                                      <int> …
## $ tf_Admin_area_na                                                            <int> …
## $ tf_Admin_area_norfolk                                                       <int> …
## $ tf_Admin_area_north                                                         <int> …
## $ tf_Admin_area_northamptonshire                                              <int> …
## $ tf_Admin_area_of                                                            <int> …
## $ tf_Admin_area_or                                                            <int> …
## $ tf_Admin_area_region                                                        <int> …
## $ tf_Admin_area_scotland                                                      <int> …
## $ tf_Admin_area_scottish                                                      <int> …
## $ tf_Admin_area_sheffield                                                     <int> …
## $ tf_Admin_area_siar                                                          <int> …
## $ tf_Admin_area_south                                                         <int> …
## $ tf_Admin_area_suffolk                                                       <int> …
## $ tf_Admin_area_surrey                                                        <int> …
## $ tf_Admin_area_the                                                           <int> …
## $ tf_Admin_area_ua                                                            <int> …
## $ tf_Admin_area_wales                                                         <int> …
## $ tf_Admin_area_welsh                                                         <int> …
## $ tf_Admin_area_west                                                          <int> …
## $ tf_Admin_area_wiltshire                                                     <int> …
## $ tf_Admin_area_yorkshire                                                     <int> …
## $ Name_of_museum_Alexander.Fleming.Laboratory.Museum                          <dbl> …
## $ Name_of_museum_Amberley.Museum...Heritage.Centre                            <dbl> …
## $ Name_of_museum_Arbroath.Art.Gallery                                         <dbl> …
## $ Name_of_museum_Ash.Museum                                                   <dbl> …
## $ Name_of_museum_Ashworth.Barracks.Museum                                     <dbl> …
## $ Name_of_museum_Atwell.Wilson.Motor.Museum                                   <dbl> …
## $ Name_of_museum_Bexley.Museum                                                <dbl> …
## $ Name_of_museum_Bilston.Craft.Gallery                                        <dbl> …
## $ Name_of_museum_Bishops.House                                                <dbl> …
## $ Name_of_museum_Centre.for.Research.Collections.and.Art.Collection           <dbl> …
## $ Name_of_museum_Chatham.Historic.Dockyard                                    <dbl> …
## $ Name_of_museum_Chiltern.Open.Air.Museum                                     <dbl> …
## $ Name_of_museum_Clan.Macalister.Charitable.Trust                             <dbl> …
## $ Name_of_museum_Cornwall.Aviation.Heritage.Centre                            <dbl> …
## $ Name_of_museum_Daventry.Museum                                              <dbl> …
## $ Name_of_museum_Dean.Heritage.Museum                                         <dbl> …
## $ Name_of_museum_Devil.s.Porridge.Museum                                      <dbl> …
## $ Name_of_museum_Dingwall.Museum                                              <dbl> …
## $ Name_of_museum_Gearannon.Blackhouse.Village                                 <dbl> …
## $ Name_of_museum_Gravesham.Museum                                             <dbl> …
## $ Name_of_museum_Hall.Ith.Wood.Museum                                         <dbl> …
## $ Name_of_museum_Haverhill...District.Local.History.Centre                    <dbl> …
## $ Name_of_museum_Horsepower..The.Museum.Of.The.Kings.Royal.Hussars            <dbl> …
## $ Name_of_museum_Jarrow.Hall...Anglo.Saxon.Farm..Village.And.Bede.Museum      <dbl> …
## $ Name_of_museum_Keep.Military.Museum                                         <dbl> …
## $ Name_of_museum_Ken.Hawley.Collection                                        <dbl> …
## $ Name_of_museum_Kings.Own.Yorkshire.Light.Infantry.Museum                    <dbl> …
## $ Name_of_museum_Langton.Matravers.Museum                                     <dbl> …
## $ Name_of_museum_Launceston.Steam.Railway.Museum                              <dbl> …
## $ Name_of_museum_Maccrimmon.Piping.Heritage.Centre                            <dbl> …
## $ Name_of_museum_Maeldune.Heritage.Centre                                     <dbl> …
## $ Name_of_museum_Maldon.District.Museum                                       <dbl> …
## $ Name_of_museum_Margrove..The.Heritage.Centre.For.South.Cleveland            <dbl> …
## $ Name_of_museum_Merseyside.Museum.of.Labour.History                          <dbl> …
## $ Name_of_museum_Military.Museum..Kings.Lynn.                                 <dbl> …
## $ Name_of_museum_Minera.Lead.Mines                                            <dbl> …
## $ Name_of_museum_Mrs.Smiths.Cottage                                           <dbl> …
## $ Name_of_museum_Museum.of.British.Surfing                                    <dbl> …
## $ Name_of_museum_Museum.Of.Domestic.Design.And.Architecture                   <dbl> …
## $ Name_of_museum_Museum.Of.Military.Medicine                                  <dbl> …
## $ Name_of_museum_National.Badminton.Museum                                    <dbl> …
## $ Name_of_museum_Ness.Heritage.Centre                                         <dbl> …
## $ Name_of_museum_New.Mills.Heritage.And.Information.Centre                    <dbl> …
## $ Name_of_museum_New.Walk.Museum.And.Art.Gallery                              <dbl> …
## $ Name_of_museum_North.East.Bus.Museum                                        <dbl> …
## $ Name_of_museum_Nottingham.Industrial.Museum                                 <dbl> …
## $ Name_of_museum_Oundle.Museum                                                <dbl> …
## $ Name_of_museum_Pinchbeck.Engine.Museum                                      <dbl> …
## $ Name_of_museum_Planet.Earth.Museum                                          <dbl> …
## $ Name_of_museum_Porthmadog.Maritime.Museum                                   <dbl> …
## $ Name_of_museum_Robert.Owen.Memorial.Museum                                  <dbl> …
## $ Name_of_museum_Rowleys.House.Museum                                         <dbl> …
## $ Name_of_museum_Royal.Electrical...Mechanical.Engineers.Museum.Of.Technology <dbl> …
## $ Name_of_museum_Ruddington.Village.Museum                                    <dbl> …
## $ Name_of_museum_Shepherd.Wheel                                               <dbl> …
## $ Name_of_museum_Staffordshire.County.Museum                                  <dbl> …
## $ Name_of_museum_The.History.On.Wheels.Museum                                 <dbl> …
## $ Name_of_museum_The.John.Buchan.Story                                        <dbl> …
## $ Name_of_museum_The.Peoples.Story                                            <dbl> …
## $ Name_of_museum_The.Rifles.Museum                                            <dbl> …
## $ Name_of_museum_The.Spalding.Bulb.Museum                                     <dbl> …
## $ Name_of_museum_Timothy.Hackworth.Victorian.Railway.Museum                   <dbl> …
## $ Name_of_museum_Tolbooth.Museum                                              <dbl> …
## $ Name_of_museum_Tom.Mathias.Museum                                           <dbl> …
## $ Name_of_museum_Trowbridge.Museum                                            <dbl> …
## $ Name_of_museum_University.Of.Aberdeen..Zoology.Department.Museum            <dbl> …
## $ Name_of_museum_University.Of.Essex.Collection.Of.Latin.American.Art         <dbl> …
## $ Name_of_museum_University.Of.Hull..Art.Collection                           <dbl> …
## $ Name_of_museum_University.Of.St.Andrews..Bell.Pettigrew.Museum              <dbl> …
## $ Name_of_museum_Victoria.Tower                                               <dbl> …
## $ Name_of_museum_Welsh.Quilt.Centre                                           <dbl> …
## $ Name_of_museum_Winterbourne.House.And.Garden                                <dbl> …
## $ Name_of_museum_Wymondham.Heritage.Museum                                    <dbl> …
## $ Name_of_museum_Yorkshire.Cricket.Museum                                     <dbl> …
## $ Address_line_1_X58.Edgbaston.Park.Road                                      <dbl> …
## $ Address_line_1_X5a.Gearrannan                                               <dbl> …
## $ Address_line_1_Arbroath.Library                                             <dbl> …
## $ Address_line_1_Ash.Centre                                                   <dbl> …
## $ Address_line_1_Ashworth.Barracks                                            <dbl> …
## $ Address_line_1_Birchgrove.Garden.Centre                                     <dbl> …
## $ Address_line_1_Bute.Medical.Building                                        <dbl> …
## $ Address_line_1_c.o.Gravesend.Library                                        <dbl> …
## $ Address_line_1_Camp.Mill                                                    <dbl> …
## $ Address_line_1_Canongate.Tolbooth                                           <dbl> …
## $ Address_line_1_Castle.Hill                                                  <dbl> …
## $ Address_line_1_County.Sessions.House                                        <dbl> …
## $ Address_line_1_Craven.Cottage                                               <dbl> …
## $ Address_line_1_Doncaster.Museum...Art.Gallery                               <dbl> …
## $ Address_line_1_Downside                                                     <dbl> …
## $ Address_line_1_Glenbarr.Abbey                                               <dbl> …
## $ Address_line_1_Greenway                                                     <dbl> …
## $ Address_line_1_Ground.Floor                                                 <dbl> …
## $ Address_line_1_Habost                                                       <dbl> …
## $ Address_line_1_Hall.Place                                                   <dbl> …
## $ Address_line_1_HAS.3                                                        <dbl> …
## $ Address_line_1_Headingly.Stadium                                            <dbl> …
## $ Address_line_1_Houghton.Bridge                                              <dbl> …
## $ Address_line_1_Jarrow.Hall                                                  <dbl> …
## $ Address_line_1_Kelham.Island.Museum                                         <dbl> …
## $ Address_line_1_Keogh.Barracks                                               <dbl> …
## $ Address_line_1_Launceston.Steam.Railway                                     <dbl> …
## $ Address_line_1_Leam.Lane                                                    <dbl> …
## $ Address_line_1_Longclose.House                                              <dbl> …
## $ Address_line_1_Main.University.Library                                      <dbl> …
## $ Address_line_1_Margrove.Park                                                <dbl> …
## $ Address_line_1_Meersbrook.Park                                              <dbl> …
## $ Address_line_1_MoDA.Collections.Centre                                      <dbl> …
## $ Address_line_1_Moot.Hall                                                    <dbl> …
## $ Address_line_1_Mount.Pleasant                                               <dbl> …
## $ Address_line_1_Museum...Art.Gallery                                         <dbl> …
## $ Address_line_1_National.Badminton.Centre                                    <dbl> …
## $ Address_line_1_Newland.Park                                                 <dbl> …
## $ Address_line_1_Oakley.Wharf.No.1                                            <dbl> …
## $ Address_line_1_Old.Pier                                                     <dbl> …
## $ Address_line_1_Old.Schoolhouse                                              <dbl> …
## $ Address_line_1_Paradise.Park                                                <dbl> …
## $ Address_line_1_Peninsula.Barracks                                           <dbl> …
## $ Address_line_1_Prince.Philip.Barrcaks                                       <dbl> …
## $ Address_line_1_Rock.Mill.Lane                                               <dbl> …
## $ Address_line_1_Rowleys.House.Museum                                         <dbl> …
## $ Address_line_1_Sail.and.Colour.Loft                                         <dbl> …
## $ Address_line_1_Shugborough.Estate                                           <dbl> …
## $ Address_line_1_Soho.Cottages                                                <dbl> …
## $ Address_line_1_St.Georges.Close                                             <dbl> …
## $ Address_line_1_St.Peters.Rooms                                              <dbl> …
## $ Address_line_1_St..Margarets.School.House                                   <dbl> …
## $ Address_line_1_St..Marys.Hospital                                           <dbl> …
## $ Address_line_1_Stanfield                                                    <dbl> …
## $ Address_line_1_Telephone.box                                                <dbl> …
## $ Address_line_1_The.Chambers.Institution                                     <dbl> …
## $ Address_line_1_The.Court.House                                              <dbl> …
## $ Address_line_1_The.Courtyard                                                <dbl> …
## $ Address_line_1_The.Cross                                                    <dbl> …
## $ Address_line_1_The.Keep                                                     <dbl> …
## $ Address_line_1_The.Moot.Hall                                                <dbl> …
## $ Address_line_1_The.Museum.In.The.Park                                       <dbl> …
## $ Address_line_1_The.Shires                                                   <dbl> …
## $ Address_line_1_The.Town.Hall                                                <dbl> …
## $ Address_line_1_The.University                                               <dbl> …
## $ Address_line_1_The.Yard                                                     <dbl> …
## $ Address_line_1_Town.Hall.Arts.Centre                                        <dbl> …
## $ Address_line_1_Town.House                                                   <dbl> …
## $ Address_line_1_University.of.Essex                                          <dbl> …
## $ Address_line_1_Welland.and.Deepings.Internal.Drainage.Board                 <dbl> …
## $ Address_line_1_Wern.Road                                                    <dbl> …
## $ Address_line_1_Whiteley.Woods                                               <dbl> …
## $ Address_line_1_Zoology.Department                                           <dbl> …
## $ Address_line_2_X3.East.Road                                                 <dbl> …
## $ Address_line_2_X47.Mill.Road                                                <dbl> …
## $ Address_line_2_X53.NEW.WALK                                                 <dbl> …
## $ Address_line_2_Aberdeen.University                                          <dbl> …
## $ Address_line_2_Aerohub.2                                                    <dbl> …
## $ Address_line_2_Alma.Street                                                  <dbl> …
## $ Address_line_2_Almondbury                                                   <dbl> …
## $ Address_line_2_Amberley                                                     <dbl> …
## $ Address_line_2_Annan.Road                                                   <dbl> …
## $ Address_line_2_Ash.Hill.Road                                                <dbl> …
## $ Address_line_2_Ash.Vale                                                     <dbl> …
## $ Address_line_2_Avis.Road                                                    <dbl> …
## $ Address_line_2_Barker.Street                                                <dbl> …
## $ Address_line_2_Bilston                                                      <dbl> …
## $ Address_line_2_Boosbeck                                                     <dbl> …
## $ Address_line_2_Borreraig                                                    <dbl> …
## $ Address_line_2_Bourne.Road                                                  <dbl> …
## $ Address_line_2_Bridport                                                     <dbl> …
## $ Address_line_2_Broad.Street                                                 <dbl> …
## $ Address_line_2_Caen.Street                                                  <dbl> …
## $ Address_line_2_Carloway                                                     <dbl> …
## $ Address_line_2_Cedar.Road                                                   <dbl> …
## $ Address_line_2_Chequer.Road                                                 <dbl> …
## $ Address_line_2_Church.Bank                                                  <dbl> …
## $ Address_line_2_Church.Lane                                                  <dbl> …
## $ Address_line_2_Church.Street                                                <dbl> …
## $ Address_line_2_Common.Road                                                  <dbl> …
## $ Address_line_2_Cottingham.Road                                              <dbl> …
## $ Address_line_2_Court.Street                                                 <dbl> …
## $ Address_line_2_Deeping.House                                                <dbl> …
## $ Address_line_2_Edgbaston                                                    <dbl> …
## $ Address_line_2_Garnon.s.Hill.Road                                           <dbl> …
## $ Address_line_2_George.Square                                                <dbl> …
## $ Address_line_2_Glenbarr                                                     <dbl> …
## $ Address_line_2_Gorelands.Lane                                               <dbl> …
## $ Address_line_2_Hackworth.Close                                              <dbl> …
## $ Address_line_2_High.Street                                                  <dbl> …
## $ Address_line_2_Hill.Terrace                                                 <dbl> …
## $ Address_line_2_Langton.Matravers                                            <dbl> …
## $ Address_line_2_Loughton.Lodge                                               <dbl> …
## $ Address_line_2_Lyneham                                                      <dbl> …
## $ Address_line_2_Market.Cross.Place                                           <dbl> …
## $ Address_line_2_Market.Square                                                <dbl> …
## $ Address_line_2_Middlesex.University                                         <dbl> …
## $ Address_line_2_Milford                                                      <dbl> …
## $ Address_line_2_Mill.Road                                                    <dbl> …
## $ Address_line_2_Millfleet                                                    <dbl> …
## $ Address_line_2_Minera                                                       <dbl> …
## $ Address_line_2_Ness                                                         <dbl> …
## $ Address_line_2_New.Mills                                                    <dbl> …
## $ Address_line_2_Norton.Lees.Lane                                             <dbl> …
## $ Address_line_2_Norwich.Road                                                 <dbl> …
## $ Address_line_2_off.Crompton.Way                                             <dbl> …
## $ Address_line_2_off.Hangingwater.Road                                        <dbl> …
## $ Address_line_2_Plume.Building                                               <dbl> …
## $ Address_line_2_Praed.Street                                                 <dbl> …
## $ Address_line_2_Romsey.Road                                                  <dbl> …
## $ Address_line_2_Soudley                                                      <dbl> …
## $ Address_line_2_St..Michaels.Lane                                            <dbl> …
## $ Address_line_2_St..Thomas.Road                                              <dbl> …
## $ Address_line_2_Stockley.Lane                                                <dbl> …
## $ Address_line_2_Sunfleet.Road                                                <dbl> …
## $ Address_line_2_The.Harbour                                                  <dbl> …
## $ Address_line_2_University.of.St.Andrews                                     <dbl> …
## $ Address_line_2_Wardley                                                      <dbl> …
## $ Address_line_2_William.Brown.Street                                         <dbl> …
## $ Address_line_2_Windmill.Street                                              <dbl> …
## $ Address_line_2_Wivenhoe.Park                                                <dbl> …
## $ Address_line_2_Wollaton.Hall.And.Deer.Park                                  <dbl> …
## $ `Village,_Town_or_City_Aldeburgh`                                           <dbl> …
## $ `Village,_Town_or_City_Aldershot`                                           <dbl> …
## $ `Village,_Town_or_City_Arbroath`                                            <dbl> …
## $ `Village,_Town_or_City_Arundel`                                             <dbl> …
## $ `Village,_Town_or_City_Ash`                                                 <dbl> …
## $ `Village,_Town_or_City_Bexley`                                              <dbl> …
## $ `Village,_Town_or_City_Birmingham`                                          <dbl> …
## $ `Village,_Town_or_City_Bolton`                                              <dbl> …
## $ `Village,_Town_or_City_Braunton`                                            <dbl> …
## $ `Village,_Town_or_City_By.Dunvegan`                                         <dbl> …
## $ `Village,_Town_or_City_By.Tarbert`                                          <dbl> …
## $ `Village,_Town_or_City_Calne`                                               <dbl> …
## $ `Village,_Town_or_City_Chalfont.St.Giles`                                   <dbl> …
## $ `Village,_Town_or_City_Chatham`                                             <dbl> …
## $ `Village,_Town_or_City_Chippenham`                                          <dbl> …
## $ `Village,_Town_or_City_Cilgerran`                                           <dbl> …
## $ `Village,_Town_or_City_Cinderford`                                          <dbl> …
## $ `Village,_Town_or_City_Colchester`                                          <dbl> …
## $ `Village,_Town_or_City_Daventry`                                            <dbl> …
## $ `Village,_Town_or_City_Dingwall`                                            <dbl> …
## $ `Village,_Town_or_City_Doncaster`                                           <dbl> …
## $ `Village,_Town_or_City_Dorchester`                                          <dbl> …
## $ `Village,_Town_or_City_East.Riggs`                                          <dbl> …
## $ `Village,_Town_or_City_Edinburgh`                                           <dbl> …
## $ `Village,_Town_or_City_Gateshead`                                           <dbl> …
## $ `Village,_Town_or_City_Gravesend`                                           <dbl> …
## $ `Village,_Town_or_City_Haverhill`                                           <dbl> …
## $ `Village,_Town_or_City_High.Peak`                                           <dbl> …
## $ `Village,_Town_or_City_Huddersfield`                                        <dbl> …
## $ `Village,_Town_or_City_Isle.of.Lewis`                                       <dbl> …
## $ `Village,_Town_or_City_Jarrow`                                              <dbl> …
## $ `Village,_Town_or_City_Kings.Lynn`                                          <dbl> …
## $ `Village,_Town_or_City_Kingston.upon.Hull`                                  <dbl> …
## $ `Village,_Town_or_City_Lampeter`                                            <dbl> …
## $ `Village,_Town_or_City_Launceston`                                          <dbl> …
## $ `Village,_Town_or_City_Leeds`                                               <dbl> …
## $ `Village,_Town_or_City_Leicester`                                           <dbl> …
## $ `Village,_Town_or_City_Liverpool`                                           <dbl> …
## $ `Village,_Town_or_City_London`                                              <dbl> …
## $ `Village,_Town_or_City_Maldon`                                              <dbl> …
## $ `Village,_Town_or_City_Milton.Keynes`                                       <dbl> …
## $ `Village,_Town_or_City_Navenby`                                             <dbl> …
## $ `Village,_Town_or_City_Newhaven`                                            <dbl> …
## $ `Village,_Town_or_City_Newquay`                                             <dbl> …
## $ `Village,_Town_or_City_Newtown`                                             <dbl> …
## $ `Village,_Town_or_City_Nottingham`                                          <dbl> …
## $ `Village,_Town_or_City_nr..Windsor`                                         <dbl> …
## $ `Village,_Town_or_City_Peebles`                                             <dbl> …
## $ `Village,_Town_or_City_Peterborough`                                        <dbl> …
## $ `Village,_Town_or_City_Porthmadog`                                          <dbl> …
## $ `Village,_Town_or_City_Ruddington`                                          <dbl> …
## $ `Village,_Town_or_City_Saltburn`                                            <dbl> …
## $ `Village,_Town_or_City_Sheffield`                                           <dbl> …
## $ `Village,_Town_or_City_Shildon`                                             <dbl> …
## $ `Village,_Town_or_City_Shrewsbury`                                          <dbl> …
## $ `Village,_Town_or_City_Spalding`                                            <dbl> …
## $ `Village,_Town_or_City_St.Andrews`                                          <dbl> …
## $ `Village,_Town_or_City_Stafford`                                            <dbl> …
## $ `Village,_Town_or_City_Stonehaven`                                          <dbl> …
## $ `Village,_Town_or_City_Swanage`                                             <dbl> …
## $ `Village,_Town_or_City_Trowbridge`                                          <dbl> …
## $ `Village,_Town_or_City_Winchester`                                          <dbl> …
## $ `Village,_Town_or_City_Wolverhampton`                                       <dbl> …
## $ `Village,_Town_or_City_Wrexham`                                             <dbl> …
## $ `Village,_Town_or_City_Wymondham`                                           <dbl> …
## $ Postcode_AB9.2TN                                                            <dbl> …
## $ Postcode_B15.2RT                                                            <dbl> …
## $ Postcode_BA14.8AT                                                           <dbl> …
## $ Postcode_BH19.3HZ                                                           <dbl> …
## $ Postcode_BL1.8UA                                                            <dbl> …
## $ Postcode_BN18.9LT                                                           <dbl> …
## $ Postcode_BN9.0DH                                                            <dbl> …
## $ Postcode_CB9.8AR                                                            <dbl> …
## $ Postcode_CM9.4PZ                                                            <dbl> …
## $ Postcode_CM9.5HX                                                            <dbl> …
## $ Postcode_CO4.3SQ                                                            <dbl> …
## $ Postcode_DA12.1BE                                                           <dbl> …
## $ Postcode_DA5.1PQ                                                            <dbl> …
## $ Postcode_DD11.1PN                                                           <dbl> …
## $ Postcode_DG12.6TF                                                           <dbl> …
## $ Postcode_DL4.1PQ                                                            <dbl> …
## $ Postcode_DN1.2AE                                                            <dbl> …
## $ Postcode_DN4.9EY                                                            <dbl> …
## $ Postcode_DT1.1RN                                                            <dbl> …
## $ Postcode_EH45.8AG                                                           <dbl> …
## $ Postcode_EH8.8BN                                                            <dbl> …
## $ Postcode_EH8.9LJ                                                            <dbl> …
## $ Postcode_EX33.1AA                                                           <dbl> …
## $ Postcode_GL14.2UB                                                           <dbl> …
## $ Postcode_GU12.5DP                                                           <dbl> …
## $ Postcode_GU12.5RQ                                                           <dbl> …
## $ Postcode_HD4.6TB                                                            <dbl> …
## $ Postcode_HP8.4AB                                                            <dbl> …
## $ Postcode_HS2.0TG                                                            <dbl> …
## $ Postcode_HS2.9AL                                                            <dbl> …
## $ Postcode_HU6.7RX                                                            <dbl> …
## $ Postcode_IP15.5DS                                                           <dbl> …
## $ Postcode_IV15.9RY                                                           <dbl> …
## $ Postcode_IV55.8ZY                                                           <dbl> …
## $ Postcode_KY16.9TS                                                           <dbl> …
## $ Postcode_L3.8EN                                                             <dbl> …
## $ Postcode_LE1.7EA                                                            <dbl> …
## $ Postcode_LL11.3DU                                                           <dbl> …
## $ Postcode_LL49.9LU                                                           <dbl> …
## $ Postcode_LN5.0EP                                                            <dbl> …
## $ Postcode_LS6.3BR                                                            <dbl> …
## $ Postcode_ME4.4TE                                                            <dbl> …
## $ Postcode_MK8.9LA                                                            <dbl> …
## $ Postcode_NE10.8YY                                                           <dbl> …
## $ Postcode_NE32.3DY                                                           <dbl> …
## $ Postcode_NG11.6HA                                                           <dbl> …
## $ Postcode_NG8.2AE                                                            <dbl> …
## $ Postcode_NN11.4BH                                                           <dbl> …
## $ Postcode_NR18.0NS                                                           <dbl> …
## $ Postcode_NW9.5HF                                                            <dbl> …
## $ Postcode_PA29.6XD                                                           <dbl> …
## $ Postcode_PE11.2TD                                                           <dbl> …
## $ Postcode_PE11.3XY                                                           <dbl> …
## $ Postcode_PE30.5EG                                                           <dbl> …
## $ Postcode_PE8.4BW                                                            <dbl> …
## $ Postcode_PL15.8DA                                                           <dbl> …
## $ Postcode_S11.7EP                                                            <dbl> …
## $ Postcode_S3.8RY                                                             <dbl> …
## $ Postcode_S8.9BE                                                             <dbl> …
## $ Postcode_SA43.2PG                                                           <dbl> …
## $ Postcode_SA48.7BB                                                           <dbl> …
## $ Postcode_SK22.3BN                                                           <dbl> …
## $ Postcode_SL4.6QY                                                            <dbl> …
## $ Postcode_SN11.0NF                                                           <dbl> …
## $ Postcode_SN15.4XX                                                           <dbl> …
## $ Postcode_SO23.8TS                                                           <dbl> …
## $ Postcode_ST17.0XB                                                           <dbl> …
## $ Postcode_SY1.1QH                                                            <dbl> …
## $ Postcode_SY16.2BB                                                           <dbl> …
## $ Postcode_TR8.4JN                                                            <dbl> …
## $ Postcode_TS12.3BZ                                                           <dbl> …
## $ Postcode_W2.1NY                                                             <dbl> …
## $ Postcode_WV14.7LU                                                           <dbl> …
## $ Governance_Government.Local_Authority                                       <dbl> …
## $ Governance_Government.National                                              <dbl> …
## $ Governance_Government.Other                                                 <dbl> …
## $ Governance_Independent.English_Heritage                                     <dbl> …
## $ Governance_Independent.Historic_Environment_Scotland                        <dbl> …
## $ Governance_Independent.National_Trust                                       <dbl> …
## $ Governance_Independent.National_Trust_for_Scotland                          <dbl> …
## $ Governance_Independent.Not_for_profit                                       <dbl> …
## $ Governance_Independent.Private                                              <dbl> …
## $ Governance_Independent.Unknown                                              <dbl> …
## $ Governance_University                                                       <dbl> …
## $ Governance_Unknown                                                          <dbl> …
## $ Size_large                                                                  <dbl> …
## $ Size_medium                                                                 <dbl> …
## $ Size_small                                                                  <dbl> …
## $ Size_unknown                                                                <dbl> …
## $ Subject_Matter_Archaeology.Medieval                                         <dbl> …
## $ Subject_Matter_Archaeology.Mixed                                            <dbl> …
## $ Subject_Matter_Archaeology.Other                                            <dbl> …
## $ Subject_Matter_Archaeology.Prehistory                                       <dbl> …
## $ Subject_Matter_Archaeology.Roman                                            <dbl> …
## $ Subject_Matter_Arts.Ceramics                                                <dbl> …
## $ Subject_Matter_Arts.Costume_and_textiles                                    <dbl> …
## $ Subject_Matter_Arts.Crafts                                                  <dbl> …
## $ Subject_Matter_Arts.Design                                                  <dbl> …
## $ Subject_Matter_Arts.Fine_and_decorative_arts                                <dbl> …
## $ Subject_Matter_Arts.Glass                                                   <dbl> …
## $ Subject_Matter_Arts.Literature                                              <dbl> …
## $ Subject_Matter_Arts.Music                                                   <dbl> …
## $ Subject_Matter_Arts.Other                                                   <dbl> …
## $ Subject_Matter_Arts.Photography                                             <dbl> …
## $ Subject_Matter_Belief_and_identity.Church_treasuries                        <dbl> …
## $ Subject_Matter_Belief_and_identity.Ethnic_group                             <dbl> …
## $ Subject_Matter_Belief_and_identity.Freemasons                               <dbl> …
## $ Subject_Matter_Belief_and_identity.Other                                    <dbl> …
## $ Subject_Matter_Belief_and_identity.Religion                                 <dbl> …
## $ Subject_Matter_Belief_and_identity.Religious_buildings                      <dbl> …
## $ Subject_Matter_Buildings.Civic                                              <dbl> …
## $ Subject_Matter_Buildings.Houses.Large_houses                                <dbl> …
## $ Subject_Matter_Buildings.Houses.Medium_houses                               <dbl> …
## $ Subject_Matter_Buildings.Houses.Small_houses                                <dbl> …
## $ Subject_Matter_Buildings.Other                                              <dbl> …
## $ Subject_Matter_Buildings.Penal                                              <dbl> …
## $ Subject_Matter_Communications                                               <dbl> …
## $ Subject_Matter_Communications.Post                                          <dbl> …
## $ Subject_Matter_Communications.Radio                                         <dbl> …
## $ Subject_Matter_Food_and_drink                                               <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Clocks_and_watches                  <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Industrial_life                     <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Metals                              <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Mining_and_quarrying                <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Mixed                               <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Other                               <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Potteries                           <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Steam_and_engines                   <dbl> …
## $ Subject_Matter_Industry_and_manufacture.Textiles                            <dbl> …
## $ Subject_Matter_Leisure_and_sport.Cricket                                    <dbl> …
## $ Subject_Matter_Leisure_and_sport.Fairgrounds_and_amusements                 <dbl> …
## $ Subject_Matter_Leisure_and_sport.Film_Cinema_and_TV                         <dbl> …
## $ Subject_Matter_Leisure_and_sport.Other                                      <dbl> …
## $ Subject_Matter_Leisure_and_sport.Rugby_and_football                         <dbl> …
## $ Subject_Matter_Leisure_and_sport.Toys_and_models                            <dbl> …
## $ Subject_Matter_Local_Histories                                              <dbl> …
## $ Subject_Matter_Medicine_and_health.Hospital                                 <dbl> …
## $ Subject_Matter_Medicine_and_health.Other                                    <dbl> …
## $ Subject_Matter_Medicine_and_health.Professional_association                 <dbl> …
## $ Subject_Matter_Mixed.Bygones                                                <dbl> …
## $ Subject_Matter_Mixed.Encyclopaedic                                          <dbl> …
## $ Subject_Matter_Mixed.Other                                                  <dbl> …
## $ Subject_Matter_Natural_world                                                <dbl> …
## $ Subject_Matter_Natural_world.Dinosaurs                                      <dbl> …
## $ Subject_Matter_Natural_world.Geology                                        <dbl> …
## $ Subject_Matter_Natural_world.Herbaria_and_gardening                         <dbl> …
## $ Subject_Matter_Natural_world.Mixed                                          <dbl> …
## $ Subject_Matter_Natural_world.Zoology                                        <dbl> …
## $ Subject_Matter_Other                                                        <dbl> …
## $ Subject_Matter_Personality.Art                                              <dbl> …
## $ Subject_Matter_Personality.Explorer                                         <dbl> …
## $ Subject_Matter_Personality.Literary                                         <dbl> …
## $ Subject_Matter_Personality.Music                                            <dbl> …
## $ Subject_Matter_Personality.Other                                            <dbl> …
## $ Subject_Matter_Personality.Political                                        <dbl> …
## $ Subject_Matter_Personality.Religious                                        <dbl> …
## $ Subject_Matter_Personality.Scientific                                       <dbl> …
## $ Subject_Matter_Rural_Industry.Farming                                       <dbl> …
## $ Subject_Matter_Rural_Industry.Forges                                        <dbl> …
## $ Subject_Matter_Rural_Industry.Other                                         <dbl> …
## $ Subject_Matter_Rural_Industry.Rural_life                                    <dbl> …
## $ Subject_Matter_Rural_Industry.Textiles                                      <dbl> …
## $ Subject_Matter_Rural_Industry.Watermills                                    <dbl> …
## $ Subject_Matter_Rural_Industry.Windmills                                     <dbl> …
## $ Subject_Matter_Science_and_technology.Computing_and_gaming                  <dbl> …
## $ Subject_Matter_Science_and_technology.Other                                 <dbl> …
## $ Subject_Matter_Sea_and_seafaring.Boats_and_ships                            <dbl> …
## $ Subject_Matter_Sea_and_seafaring.Fishing                                    <dbl> …
## $ Subject_Matter_Sea_and_seafaring.Lighthouses                                <dbl> …
## $ Subject_Matter_Sea_and_seafaring.Mixed                                      <dbl> …
## $ Subject_Matter_Sea_and_seafaring.Other                                      <dbl> …
## $ Subject_Matter_Services.Fire                                                <dbl> …
## $ Subject_Matter_Services.Other                                               <dbl> …
## $ Subject_Matter_Services.Police                                              <dbl> …
## $ Subject_Matter_Transport.Aviation                                           <dbl> …
## $ Subject_Matter_Transport.Bicycles                                           <dbl> …
## $ Subject_Matter_Transport.Buses_and_trams                                    <dbl> …
## $ Subject_Matter_Transport.Canals                                             <dbl> …
## $ Subject_Matter_Transport.Cars_and_motorbikes                                <dbl> …
## $ Subject_Matter_Transport.Mixed                                              <dbl> …
## $ Subject_Matter_Transport.Other                                              <dbl> …
## $ Subject_Matter_Transport.Trains_and_railways                                <dbl> …
## $ Subject_Matter_Utilities.Water_and_waste                                    <dbl> …
## $ Subject_Matter_War_and_conflict.Airforce                                    <dbl> …
## $ Subject_Matter_War_and_conflict.Bunker                                      <dbl> …
## $ Subject_Matter_War_and_conflict.Castles_and_forts                           <dbl> …
## $ Subject_Matter_War_and_conflict.Event_or_site                               <dbl> …
## $ Subject_Matter_War_and_conflict.Military                                    <dbl> …
## $ Subject_Matter_War_and_conflict.Navy                                        <dbl> …
## $ Subject_Matter_War_and_conflict.Other                                       <dbl> …
## $ Subject_Matter_War_and_conflict.Regiment                                    <dbl> …
## $ Year_opened_X1845.1845                                                      <dbl> …
## $ Year_opened_X1849.1849                                                      <dbl> …
## $ Year_opened_X1898.1898                                                      <dbl> …
## $ Year_opened_X1901.1901                                                      <dbl> …
## $ Year_opened_X1911.1911                                                      <dbl> …
## $ Year_opened_X1913.1913                                                      <dbl> …
## $ Year_opened_X1921.1921                                                      <dbl> …
## $ Year_opened_X1927.1927                                                      <dbl> …
## $ Year_opened_X1928.1928                                                      <dbl> …
## $ Year_opened_X1932.1932                                                      <dbl> …
## $ Year_opened_X1937.1937                                                      <dbl> …
## $ Year_opened_X1945.2017                                                      <dbl> …
## $ Year_opened_X1957.1957                                                      <dbl> …
## $ Year_opened_X1958.1958                                                      <dbl> …
## $ Year_opened_X1960.2017                                                      <dbl> …
## $ Year_opened_X1962.1962                                                      <dbl> …
## $ Year_opened_X1963.1963                                                      <dbl> …
## $ Year_opened_X1964.1964                                                      <dbl> …
## $ Year_opened_X1968.1968                                                      <dbl> …
## $ Year_opened_X1971.1971                                                      <dbl> …
## $ Year_opened_X1972.1972                                                      <dbl> …
## $ Year_opened_X1974.1974                                                      <dbl> …
## $ Year_opened_X1975.1975                                                      <dbl> …
## $ Year_opened_X1976.1976                                                      <dbl> …
## $ Year_opened_X1979.1979                                                      <dbl> …
## $ Year_opened_X1980.1980                                                      <dbl> …
## $ Year_opened_X1981.1981                                                      <dbl> …
## $ Year_opened_X1982.1982                                                      <dbl> …
## $ Year_opened_X1983.1983                                                      <dbl> …
## $ Year_opened_X1984.1984                                                      <dbl> …
## $ Year_opened_X1985.1985                                                      <dbl> …
## $ Year_opened_X1986.1986                                                      <dbl> …
## $ Year_opened_X1989.1989                                                      <dbl> …
## $ Year_opened_X1990.1990                                                      <dbl> …
## $ Year_opened_X1993.1993                                                      <dbl> …
## $ Year_opened_X1994.1994                                                      <dbl> …
## $ Year_opened_X1995.1995                                                      <dbl> …
## $ Year_opened_X1996.1996                                                      <dbl> …
## $ Year_opened_X1998.1998                                                      <dbl> …
## $ Year_opened_X1999.1999                                                      <dbl> …
## $ Year_opened_X2003.2003                                                      <dbl> …
## $ Year_opened_X2005.2005                                                      <dbl> …
## $ Year_opened_X2006.2006                                                      <dbl> …
## $ Year_opened_X2008.2008                                                      <dbl> …
## $ Year_opened_X2009.2009                                                      <dbl> …
## $ Year_opened_X2010.2010                                                      <dbl> …
## $ Year_opened_X2011.2011                                                      <dbl> …
## $ Year_opened_X2012.2012                                                      <dbl> …
## $ Year_opened_X2013.2013                                                      <dbl> …
## $ Year_opened_X2014.2014                                                      <dbl> …
## $ Area_Deprivation_index_X2                                                   <dbl> …
## $ Area_Deprivation_index_X3                                                   <dbl> …
## $ Area_Deprivation_index_X4                                                   <dbl> …
## $ Area_Deprivation_index_X5                                                   <dbl> …
## $ Area_Deprivation_index_X6                                                   <dbl> …
## $ Area_Deprivation_index_X7                                                   <dbl> …
## $ Area_Deprivation_index_X8                                                   <dbl> …
## $ Area_Deprivation_index_X9                                                   <dbl> …
## $ Area_Deprivation_index_X10                                                  <dbl> …
## $ Area_Deprivation_index_crime_X2                                             <dbl> …
## $ Area_Deprivation_index_crime_X3                                             <dbl> …
## $ Area_Deprivation_index_crime_X4                                             <dbl> …
## $ Area_Deprivation_index_crime_X5                                             <dbl> …
## $ Area_Deprivation_index_crime_X6                                             <dbl> …
## $ Area_Deprivation_index_crime_X7                                             <dbl> …
## $ Area_Deprivation_index_crime_X8                                             <dbl> …
## $ Area_Deprivation_index_crime_X9                                             <dbl> …
## $ Area_Deprivation_index_crime_X10                                            <dbl> …
## $ Area_Deprivation_index_education_X2                                         <dbl> …
## $ Area_Deprivation_index_education_X3                                         <dbl> …
## $ Area_Deprivation_index_education_X4                                         <dbl> …
## $ Area_Deprivation_index_education_X5                                         <dbl> …
## $ Area_Deprivation_index_education_X6                                         <dbl> …
## $ Area_Deprivation_index_education_X7                                         <dbl> …
## $ Area_Deprivation_index_education_X8                                         <dbl> …
## $ Area_Deprivation_index_education_X9                                         <dbl> …
## $ Area_Deprivation_index_education_X10                                        <dbl> …
## $ Area_Deprivation_index_employment_X2                                        <dbl> …
## $ Area_Deprivation_index_employment_X3                                        <dbl> …
## $ Area_Deprivation_index_employment_X4                                        <dbl> …
## $ Area_Deprivation_index_employment_X5                                        <dbl> …
## $ Area_Deprivation_index_employment_X6                                        <dbl> …
## $ Area_Deprivation_index_employment_X7                                        <dbl> …
## $ Area_Deprivation_index_employment_X8                                        <dbl> …
## $ Area_Deprivation_index_employment_X9                                        <dbl> …
## $ Area_Deprivation_index_employment_X10                                       <dbl> …
## $ Area_Deprivation_index_health_X2                                            <dbl> …
## $ Area_Deprivation_index_health_X3                                            <dbl> …
## $ Area_Deprivation_index_health_X4                                            <dbl> …
## $ Area_Deprivation_index_health_X5                                            <dbl> …
## $ Area_Deprivation_index_health_X6                                            <dbl> …
## $ Area_Deprivation_index_health_X7                                            <dbl> …
## $ Area_Deprivation_index_health_X8                                            <dbl> …
## $ Area_Deprivation_index_health_X9                                            <dbl> …
## $ Area_Deprivation_index_health_X10                                           <dbl> …
## $ Area_Deprivation_index_housing_X2                                           <dbl> …
## $ Area_Deprivation_index_housing_X3                                           <dbl> …
## $ Area_Deprivation_index_housing_X4                                           <dbl> …
## $ Area_Deprivation_index_housing_X5                                           <dbl> …
## $ Area_Deprivation_index_housing_X6                                           <dbl> …
## $ Area_Deprivation_index_housing_X7                                           <dbl> …
## $ Area_Deprivation_index_housing_X8                                           <dbl> …
## $ Area_Deprivation_index_housing_X9                                           <dbl> …
## $ Area_Deprivation_index_housing_X10                                          <dbl> …
## $ Area_Deprivation_index_income_X2                                            <dbl> …
## $ Area_Deprivation_index_income_X3                                            <dbl> …
## $ Area_Deprivation_index_income_X4                                            <dbl> …
## $ Area_Deprivation_index_income_X5                                            <dbl> …
## $ Area_Deprivation_index_income_X6                                            <dbl> …
## $ Area_Deprivation_index_income_X7                                            <dbl> …
## $ Area_Deprivation_index_income_X8                                            <dbl> …
## $ Area_Deprivation_index_income_X9                                            <dbl> …
## $ Area_Deprivation_index_income_X10                                           <dbl> …
## $ Area_Deprivation_index_services_X2                                          <dbl> …
## $ Area_Deprivation_index_services_X3                                          <dbl> …
## $ Area_Deprivation_index_services_X4                                          <dbl> …
## $ Area_Deprivation_index_services_X5                                          <dbl> …
## $ Area_Deprivation_index_services_X6                                          <dbl> …
## $ Area_Deprivation_index_services_X7                                          <dbl> …
## $ Area_Deprivation_index_services_X8                                          <dbl> …
## $ Area_Deprivation_index_services_X9                                          <dbl> …
## $ Area_Deprivation_index_services_X10                                         <dbl> …
## $ Area_Geodemographic_group_English.and.Welsh.Countryside                     <dbl> …
## $ Area_Geodemographic_group_Ethnically.Diverse.Metropolitan..Living           <dbl> …
## $ Area_Geodemographic_group_Ethnically.Diverse.Metropolitan.Living            <dbl> …
## $ Area_Geodemographic_group_Larger.Towns.and.Cities                           <dbl> …
## $ Area_Geodemographic_group_London.Cosmopolitan                               <dbl> …
## $ Area_Geodemographic_group_Manufacturing.Traits                              <dbl> …
## $ Area_Geodemographic_group_Northern.Ireland.Countryside                      <dbl> …
## $ Area_Geodemographic_group_Remoter.Coastal.Living                            <dbl> …
## $ Area_Geodemographic_group_Rural.Urban.Fringe                                <dbl> …
## $ Area_Geodemographic_group_Scottish.Countryside                              <dbl> …
## $ Area_Geodemographic_group_Scottish.Industrial.Heritage                      <dbl> …
## $ Area_Geodemographic_group_Services.Manufacturing.and.Mining.Legacy          <dbl> …
## $ Area_Geodemographic_group_Suburban.Traits                                   <dbl> …
## $ Area_Geodemographic_group_Thriving.Rural                                    <dbl> …
## $ Area_Geodemographic_group_Town.Living                                       <dbl> …
## $ Area_Geodemographic_group_University.Towns.and.Cities                       <dbl> …
## $ Area_Geodemographic_group_code_X1br                                         <dbl> …
## $ Area_Geodemographic_group_code_X2ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X2br                                         <dbl> …
## $ Area_Geodemographic_group_code_X3ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X3br                                         <dbl> …
## $ Area_Geodemographic_group_code_X3cr                                         <dbl> …
## $ Area_Geodemographic_group_code_X4ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X5ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X6ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X6br                                         <dbl> …
## $ Area_Geodemographic_group_code_X7ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X7br                                         <dbl> …
## $ Area_Geodemographic_group_code_X7cr                                         <dbl> …
## $ Area_Geodemographic_group_code_X8ar                                         <dbl> …
## $ Area_Geodemographic_group_code_X8br                                         <dbl> …

Specify model

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
    loss_reduction = tune(), sample_size = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

Tune hyperparameters

doParallel::registerDoParallel()

set.seed(17375)
xgboost_tune <-
  tune_grid(xgboost_workflow,
            resamples = data_cv,
            grid = 5)
## Warning: package 'xgboost' was built under R version 4.3.3