Import Data

museums <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(museums)
Data summary
Name museums
Number of rows 4191
Number of columns 35
_______________________
Column type frequency:
character 24
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
museum_id 0 1.00 8 15 0 4191 0
Name_of_museum 0 1.00 3 76 0 4190 0
Address_line_1 441 0.89 3 61 0 3212 0
Address_line_2 2816 0.33 3 39 0 1167 0
Village,_Town_or_City 4 1.00 3 24 0 1696 0
Postcode 0 1.00 6 9 0 3918 0
Admin_area 0 1.00 12 137 0 393 0
Accreditation 0 1.00 10 12 0 2 0
Governance 0 1.00 7 41 0 13 0
Size 0 1.00 4 7 0 5 0
Size_provenance 179 0.96 2 29 0 16 0
Subject_Matter 0 1.00 5 45 0 114 0
Year_opened 0 1.00 9 9 0 351 0
Year_closed 0 1.00 9 9 0 170 0
DOMUS_Subject_Matter 2788 0.33 5 27 0 21 0
Primary_provenance_of_data 0 1.00 3 8 0 18 0
Identifier_used_in_primary_data_source 2056 0.51 2 8 0 2134 0
Area_Geodemographic_group 49 0.99 11 40 0 17 0
Area_Geodemographic_group_code 49 0.99 3 3 0 16 0
Area_Geodemographic_subgroup 49 0.99 12 39 0 25 0
Area_Geodemographic_subgroup_code 49 0.99 4 4 0 24 0
Area_Geodemographic_supergroup 49 0.99 16 39 0 8 0
Area_Geodemographic_supergroup_code 49 0.99 2 2 0 8 0
Notes 2980 0.29 12 751 0 956 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Latitude 0 1.00 52.93 2.09 49.18 51.48 52.47 53.96 100.00 ▇▁▁▁▁
Longitude 0 1.00 -1.96 1.84 -8.09 -3.10 -1.87 -0.48 1.76 ▁▂▇▇▅
DOMUS_identifier 2347 0.44 1303.45 1597.19 1.00 486.50 991.50 1470.25 7746.00 ▇▂▁▁▁
Area_Deprivation_index 49 0.99 5.44 2.48 1.00 4.00 5.00 7.00 10.00 ▃▆▇▆▃
Area_Deprivation_index_crime 49 0.99 5.43 3.07 1.00 3.00 6.00 8.00 10.00 ▇▆▅▇▇
Area_Deprivation_index_education 49 0.99 6.04 2.61 1.00 4.00 6.00 8.00 10.00 ▃▅▇▇▆
Area_Deprivation_index_employment 49 0.99 6.08 2.76 1.00 4.00 6.00 8.00 10.00 ▅▆▇▇▇
Area_Deprivation_index_health 49 0.99 6.02 2.82 1.00 4.00 6.00 8.00 10.00 ▅▆▆▇▇
Area_Deprivation_index_housing 49 0.99 3.97 2.75 1.00 1.00 3.00 6.00 10.00 ▇▅▃▂▂
Area_Deprivation_index_income 49 0.99 5.99 2.62 1.00 4.00 6.00 8.00 10.00 ▃▆▇▇▆
Area_Deprivation_index_services 49 0.99 4.78 3.01 1.00 2.00 4.00 7.00 10.00 ▇▅▅▅▅

Clean Data

colnames(museums)[5] <- "Village_Town_or_City"

factors_vec <- museums %>% select(Name_of_museum, Village_Town_or_City, Accreditation, Governance, Subject_Matter) %>% names()

museums_clean <- museums %>%
  mutate(across(where(is.character), factor)) %>%
  
  separate_wider_delim(Year_opened, delim = ":", names = c("Year_opened", "Y_o_duplicate")) %>%
  
  select(-museum_id, -Latitude, -Longitude, -Notes, -Address_line_2, -Postcode, DOMUS_Subject_Matter, -DOMUS_identifier,-Identifier_used_in_primary_data_source, Address_line_1, -Size_provenance, Primary_provenance_of_data, -Y_o_duplicate, -Year_closed, -Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services, -Area_Geodemographic_group, -Area_Geodemographic_group_code, Area_Geodemographic_subgroup, -Area_Geodemographic_subgroup_code, -Area_Geodemographic_supergroup, -Area_Geodemographic_supergroup_code, Village_Town_or_City) %>%
  na.omit()
  
  

museums_clean %>% glimpse()
## Rows: 1,370
## Columns: 19
## $ Name_of_museum                    <fct> "Prestongrange Industrial Heritage M…
## $ Address_line_1                    <fct> "Prestongrange", "Chambers Street", …
## $ Village_Town_or_City              <fct> "nr Prestonpans", "Edinburgh", "Cumn…
## $ Admin_area                        <fct> /Scotland/East Lothian (Scottish Cou…
## $ Accreditation                     <fct> Accredited, Accredited, Accredited, …
## $ Governance                        <fct> Government-Local_Authority, Governme…
## $ Size                              <fct> small, large, small, small, small, l…
## $ Subject_Matter                    <fct> Industry_and_manufacture-Mining_and_…
## $ Year_opened                       <chr> "1993", "1854", "1891", "1966", "198…
## $ DOMUS_Subject_Matter              <fct> science and industry, mixed collecti…
## $ Primary_provenance_of_data        <fct> domus, domus, domus, domus, domus, d…
## $ Area_Deprivation_index_crime      <dbl> 3, 1, 1, 7, 10, 1, 6, 4, 9, 4, 7, 3,…
## $ Area_Deprivation_index_education  <dbl> 7, 7, 4, 7, 6, 4, 3, 4, 8, 6, 7, 4, …
## $ Area_Deprivation_index_employment <dbl> 6, 6, 2, 7, 6, 4, 3, 6, 10, 4, 10, 4…
## $ Area_Deprivation_index_health     <dbl> 8, 5, 2, 9, 5, 5, 5, 6, 10, 2, 10, 4…
## $ Area_Deprivation_index_housing    <dbl> 9, 1, 6, 7, 7, 1, 10, 3, 2, 1, 10, 6…
## $ Area_Deprivation_index_income     <dbl> 5, 7, 3, 8, 6, 6, 3, 6, 10, 5, 10, 4…
## $ Area_Deprivation_index_services   <dbl> 3, 9, 9, 1, 1, 9, 9, 6, 1, 10, 2, 9,…
## $ Area_Geodemographic_subgroup      <fct> Scottish Industrial Legacy, Larger T…

Explore Data

museums_clean %>% count(Accreditation)
## # A tibble: 2 × 2
##   Accreditation     n
##   <fct>         <int>
## 1 Accredited     1007
## 2 Unaccredited    363
museums_clean %>% distinct(Subject_Matter) 
## # A tibble: 98 × 1
##    Subject_Matter                               
##    <fct>                                        
##  1 Industry_and_manufacture-Mining_and_quarrying
##  2 Mixed-Encyclopaedic                          
##  3 Local_Histories                              
##  4 War_and_conflict-Military                    
##  5 Personality-Explorer                         
##  6 Archaeology-Roman                            
##  7 Personality-Literary                         
##  8 Sea_and_seafaring-Fishing                    
##  9 Science_and_technology-Other                 
## 10 Sea_and_seafaring-Boats_and_ships            
## # ℹ 88 more rows
museums_clean %>% skimr::skim(Subject_Matter)
Data summary
Name Piped data
Number of rows 1370
Number of columns 19
_______________________
Column type frequency:
factor 1
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Subject_Matter 0 1 FALSE 98 Loc: 397, Art: 101, War: 95, Bui: 71
museums_clean %>% 
  ggplot(aes(Accreditation)) +
  geom_bar()

Accreditation vs. Governance

museums_clean %>%
  ggplot(aes(Accreditation, Governance)) +
  geom_boxplot()

Correlation Plot

# Step 1: Binarize

museums_binarized <- museums_clean %>%
  select(-Name_of_museum) %>%
  binarize()

museums_binarized %>% glimpse()
## Rows: 1,370
## Columns: 150
## $ Address_line_1__High_Street                                                                                                      <dbl> …
## $ `Address_line_1__-OTHER`                                                                                                         <dbl> …
## $ Village_Town_or_City__Edinburgh                                                                                                  <dbl> …
## $ Village_Town_or_City__Leeds                                                                                                      <dbl> …
## $ Village_Town_or_City__London                                                                                                     <dbl> …
## $ Village_Town_or_City__Manchester                                                                                                 <dbl> …
## $ Village_Town_or_City__York                                                                                                       <dbl> …
## $ `Village_Town_or_City__-OTHER`                                                                                                   <dbl> …
## $ `Admin_area__/England/London_(English_Region)/Camden_(London_Borough)`                                                           <dbl> …
## $ `Admin_area__/England/North_East_(English_Region)/North_East_(English_CA)/Northumberland_(English_District_or_Borough)`          <dbl> …
## $ `Admin_area__/England/North_West_(English_Region)/Greater_Manchester_(English_CA)/Manchester_(English_District_or_Borough)`      <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Cornwall_(English_UA)`                                                         <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Wiltshire_(English_UA)`                                                        <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/West_Yorkshire_(English_CA)/Leeds_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/York_(English_UA)`                                               <dbl> …
## $ `Admin_area__/Scotland/City_of_Edinburgh_(Scottish_Council_Area)`                                                                <dbl> …
## $ `Admin_area__/Scotland/Dumfries_and_Galloway_(Scottish_Council_Area)`                                                            <dbl> …
## $ `Admin_area__/Scotland/Highland_(Scottish_Council_Area)`                                                                         <dbl> …
## $ `Admin_area__/Scotland/Scottish_Borders_(Scottish_Council_Area)`                                                                 <dbl> …
## $ `Admin_area__-OTHER`                                                                                                             <dbl> …
## $ Accreditation__Accredited                                                                                                        <dbl> …
## $ Accreditation__Unaccredited                                                                                                      <dbl> …
## $ `Governance__Government-Local_Authority`                                                                                         <dbl> …
## $ `Governance__Government-National`                                                                                                <dbl> …
## $ `Governance__Independent-English_Heritage`                                                                                       <dbl> …
## $ `Governance__Independent-National_Trust`                                                                                         <dbl> …
## $ `Governance__Independent-Not_for_profit`                                                                                         <dbl> …
## $ `Governance__Independent-Unknown`                                                                                                <dbl> …
## $ Governance__University                                                                                                           <dbl> …
## $ `Governance__-OTHER`                                                                                                             <dbl> …
## $ Size__large                                                                                                                      <dbl> …
## $ Size__medium                                                                                                                     <dbl> …
## $ Size__small                                                                                                                      <dbl> …
## $ `Size__-OTHER`                                                                                                                   <dbl> …
## $ `Subject_Matter__Archaeology-Roman`                                                                                              <dbl> …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts`                                                                                  <dbl> …
## $ `Subject_Matter__Buildings-Houses-Large_houses`                                                                                  <dbl> …
## $ `Subject_Matter__Buildings-Houses-Medium_houses`                                                                                 <dbl> …
## $ `Subject_Matter__Buildings-Other`                                                                                                <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying`                                                                  <dbl> …
## $ Subject_Matter__Local_Histories                                                                                                  <dbl> …
## $ `Subject_Matter__Mixed-Encyclopaedic`                                                                                            <dbl> …
## $ `Subject_Matter__Mixed-Other`                                                                                                    <dbl> …
## $ `Subject_Matter__Personality-Literary`                                                                                           <dbl> …
## $ `Subject_Matter__Sea_and_seafaring-Mixed`                                                                                        <dbl> …
## $ `Subject_Matter__Transport-Trains_and_railways`                                                                                  <dbl> …
## $ `Subject_Matter__War_and_conflict-Castles_and_forts`                                                                             <dbl> …
## $ `Subject_Matter__War_and_conflict-Regiment`                                                                                      <dbl> …
## $ `Subject_Matter__-OTHER`                                                                                                         <dbl> …
## $ Year_opened__1927                                                                                                                <dbl> …
## $ Year_opened__1951                                                                                                                <dbl> …
## $ Year_opened__1961                                                                                                                <dbl> …
## $ Year_opened__1962                                                                                                                <dbl> …
## $ Year_opened__1963                                                                                                                <dbl> …
## $ Year_opened__1964                                                                                                                <dbl> …
## $ Year_opened__1966                                                                                                                <dbl> …
## $ Year_opened__1967                                                                                                                <dbl> …
## $ Year_opened__1968                                                                                                                <dbl> …
## $ Year_opened__1969                                                                                                                <dbl> …
## $ Year_opened__1972                                                                                                                <dbl> …
## $ Year_opened__1973                                                                                                                <dbl> …
## $ Year_opened__1974                                                                                                                <dbl> …
## $ Year_opened__1975                                                                                                                <dbl> …
## $ Year_opened__1976                                                                                                                <dbl> …
## $ Year_opened__1977                                                                                                                <dbl> …
## $ Year_opened__1978                                                                                                                <dbl> …
## $ Year_opened__1979                                                                                                                <dbl> …
## $ Year_opened__1980                                                                                                                <dbl> …
## $ Year_opened__1981                                                                                                                <dbl> …
## $ Year_opened__1982                                                                                                                <dbl> …
## $ Year_opened__1983                                                                                                                <dbl> …
## $ Year_opened__1984                                                                                                                <dbl> …
## $ Year_opened__1985                                                                                                                <dbl> …
## $ Year_opened__1986                                                                                                                <dbl> …
## $ Year_opened__1987                                                                                                                <dbl> …
## $ Year_opened__1988                                                                                                                <dbl> …
## $ Year_opened__1989                                                                                                                <dbl> …
## $ Year_opened__1990                                                                                                                <dbl> …
## $ Year_opened__1991                                                                                                                <dbl> …
## $ Year_opened__1992                                                                                                                <dbl> …
## $ Year_opened__1993                                                                                                                <dbl> …
## $ Year_opened__1994                                                                                                                <dbl> …
## $ `Year_opened__-OTHER`                                                                                                            <dbl> …
## $ DOMUS_Subject_Matter__agriculture                                                                                                <dbl> …
## $ DOMUS_Subject_Matter__archaeology                                                                                                <dbl> …
## $ DOMUS_Subject_Matter__archives                                                                                                   <dbl> …
## $ DOMUS_Subject_Matter__biology_and_natural_history                                                                                <dbl> …
## $ DOMUS_Subject_Matter__costume_and_textiles                                                                                       <dbl> …
## $ DOMUS_Subject_Matter__decorative_and_applied_arts                                                                                <dbl> …
## $ DOMUS_Subject_Matter__fine_art                                                                                                   <dbl> …
## $ DOMUS_Subject_Matter__maritime                                                                                                   <dbl> …
## $ DOMUS_Subject_Matter__military                                                                                                   <dbl> …
## $ DOMUS_Subject_Matter__mixed_collection                                                                                           <dbl> …
## $ DOMUS_Subject_Matter__personalia                                                                                                 <dbl> …
## $ DOMUS_Subject_Matter__science_and_industry                                                                                       <dbl> …
## $ DOMUS_Subject_Matter__social_history                                                                                             <dbl> …
## $ DOMUS_Subject_Matter__transport                                                                                                  <dbl> …
## $ `DOMUS_Subject_Matter__-OTHER`                                                                                                   <dbl> …
## $ Primary_provenance_of_data__domus                                                                                                <dbl> …
## $ `Primary_provenance_of_data__-OTHER`                                                                                             <dbl> …
## $ `Area_Deprivation_index_crime__-Inf_2`                                                                                           <dbl> …
## $ Area_Deprivation_index_crime__2_4                                                                                                <dbl> …
## $ Area_Deprivation_index_crime__4_7                                                                                                <dbl> …
## $ Area_Deprivation_index_crime__7_Inf                                                                                              <dbl> …
## $ `Area_Deprivation_index_education__-Inf_4`                                                                                       <dbl> …
## $ Area_Deprivation_index_education__4_6                                                                                            <dbl> …
## $ Area_Deprivation_index_education__6_8                                                                                            <dbl> …
## $ Area_Deprivation_index_education__8_Inf                                                                                          <dbl> …
## $ `Area_Deprivation_index_employment__-Inf_3`                                                                                      <dbl> …
## $ Area_Deprivation_index_employment__3_6                                                                                           <dbl> …
## $ Area_Deprivation_index_employment__6_8                                                                                           <dbl> …
## $ Area_Deprivation_index_employment__8_Inf                                                                                         <dbl> …
## $ `Area_Deprivation_index_health__-Inf_3`                                                                                          <dbl> …
## $ Area_Deprivation_index_health__3_5                                                                                               <dbl> …
## $ Area_Deprivation_index_health__5_8                                                                                               <dbl> …
## $ Area_Deprivation_index_health__8_Inf                                                                                             <dbl> …
## $ `Area_Deprivation_index_housing__-Inf_3`                                                                                         <dbl> …
## $ Area_Deprivation_index_housing__3_6                                                                                              <dbl> …
## $ Area_Deprivation_index_housing__6_Inf                                                                                            <dbl> …
## $ `Area_Deprivation_index_income__-Inf_4`                                                                                          <dbl> …
## $ Area_Deprivation_index_income__4_6                                                                                               <dbl> …
## $ Area_Deprivation_index_income__6_8                                                                                               <dbl> …
## $ Area_Deprivation_index_income__8_Inf                                                                                             <dbl> …
## $ `Area_Deprivation_index_services__-Inf_3`                                                                                        <dbl> …
## $ Area_Deprivation_index_services__3_5                                                                                             <dbl> …
## $ Area_Deprivation_index_services__5_8                                                                                             <dbl> …
## $ Area_Deprivation_index_services__8_Inf                                                                                           <dbl> …
## $ Area_Geodemographic_subgroup__Affluent_rural                                                                                     <dbl> …
## $ Area_Geodemographic_subgroup__Ageing_Coastal_Living                                                                              <dbl> …
## $ Area_Geodemographic_subgroup__Country_Living                                                                                     <dbl> …
## $ Area_Geodemographic_subgroup__Ethnically_Diverse_Metropolitan_Living                                                             <dbl> …
## $ Area_Geodemographic_subgroup__Expanding_Areas                                                                                    <dbl> …
## $ `Area_Geodemographic_subgroup__Industrial_and_Multi-ethnic`                                                                      <dbl> …
## $ Area_Geodemographic_subgroup__Larger_Towns_and_Cities                                                                            <dbl> …
## $ Area_Geodemographic_subgroup__London_Cosmopolitan                                                                                <dbl> …
## $ Area_Geodemographic_subgroup__Manufacturing_Legacy                                                                               <dbl> …
## $ Area_Geodemographic_subgroup__Mining_Legacy                                                                                      <dbl> …
## $ Area_Geodemographic_subgroup__Northern_Ireland_Countryside                                                                       <dbl> …
## $ Area_Geodemographic_subgroup__Older_Farming_Communities                                                                          <dbl> …
## $ Area_Geodemographic_subgroup__Prosperous_Towns                                                                                   <dbl> …
## $ `Area_Geodemographic_subgroup__Rural-Urban_Fringe`                                                                               <dbl> …
## $ Area_Geodemographic_subgroup__Rural_Growth_Areas                                                                                 <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Countryside                                                                               <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Industrial_Legacy                                                                         <dbl> …
## $ Area_Geodemographic_subgroup__Seaside_Living                                                                                     <dbl> …
## $ Area_Geodemographic_subgroup__Service_Economy                                                                                    <dbl> …
## $ Area_Geodemographic_subgroup__Sparse_English_and_Welsh_Countryside                                                               <dbl> …
## $ Area_Geodemographic_subgroup__University_Towns_and_Cities                                                                        <dbl> …
## $ Area_Geodemographic_subgroup__Urban_Living                                                                                       <dbl> …
## $ `Area_Geodemographic_subgroup__-OTHER`                                                                                           <dbl> …
# Step 2: Correlation
museums_correlation <-  museums_binarized %>%
  correlate(Accreditation__Accredited)

museums_correlation %>% glimpse()
## Rows: 150
## Columns: 3
## $ feature     <fct> Accreditation, Accreditation, Governance, Size, Year_opene…
## $ bin         <chr> "Accredited", "Unaccredited", "Independent-Unknown", "smal…
## $ correlation <dbl> 1.00000000, -1.00000000, -0.19751976, -0.15704734, 0.13432…
# Step 3: Plot
museums_correlation %>% 
  correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 113 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Model Building

Split Data

set.seed(1234)
museums_clean <-  museums_clean %>% sample_n(100)

museums_split <- initial_split(museums_clean, strata = Accreditation)
museums_train <- training(museums_split)
museums_test <- testing(museums_split)

museums_cv <- rsample::vfold_cv(museums_train, strata = Accreditation)
museums_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [66/8]> Fold01
##  2 <split [66/8]> Fold02
##  3 <split [66/8]> Fold03
##  4 <split [66/8]> Fold04
##  5 <split [67/7]> Fold05
##  6 <split [67/7]> Fold06
##  7 <split [67/7]> Fold07
##  8 <split [67/7]> Fold08
##  9 <split [67/7]> Fold09
## 10 <split [67/7]> Fold10

Preprocess Data Using Recipes Package

library(themis)
## Warning: package 'themis' was built under R version 4.4.1
# Convert all nominal predictors (characters and factors) to numbers.
museums_rec <- recipes::recipe(Accreditation ~ ., data = museums_train) %>%
  update_role(Name_of_museum, new_role = "ID") %>% 
  step_other(all_nominal_predictors()) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_smote(Accreditation)
  


museums_rec %>% prep() %>% juice() %>% glimpse()
## Rows: 100
## Columns: 39
## $ Name_of_museum                                                    <fct> "Gro…
## $ Area_Deprivation_index_crime                                      <dbl> 8, 6…
## $ Area_Deprivation_index_education                                  <dbl> 8, 6…
## $ Area_Deprivation_index_employment                                 <dbl> 8, 5…
## $ Area_Deprivation_index_health                                     <dbl> 8, 6…
## $ Area_Deprivation_index_housing                                    <dbl> 5, 3…
## $ Area_Deprivation_index_income                                     <dbl> 8, 5…
## $ Area_Deprivation_index_services                                   <dbl> 1, 8…
## $ Accreditation                                                     <fct> Accr…
## $ Address_line_1_other                                              <dbl> 1, 1…
## $ Village_Town_or_City_other                                        <dbl> 1, 1…
## $ Admin_area_other                                                  <dbl> 1, 1…
## $ Governance_Government.National                                    <dbl> 0, 0…
## $ Governance_Independent.Not_for_profit                             <dbl> 1, 1…
## $ Governance_other                                                  <dbl> 0, 0…
## $ Size_medium                                                       <dbl> 0, 0…
## $ Size_small                                                        <dbl> 1, 1…
## $ Size_other                                                        <dbl> 0, 0…
## $ Subject_Matter_Local_Histories                                    <dbl> 0, 1…
## $ Subject_Matter_War_and_conflict.Regiment                          <dbl> 0, 0…
## $ Subject_Matter_other                                              <dbl> 1, 0…
## $ Year_opened_X1989                                                 <dbl> 1, 0…
## $ Year_opened_other                                                 <dbl> 0, 1…
## $ DOMUS_Subject_Matter_decorative.and.applied.arts                  <dbl> 0, 0…
## $ DOMUS_Subject_Matter_fine.art                                     <dbl> 0, 0…
## $ DOMUS_Subject_Matter_military                                     <dbl> 0, 0…
## $ DOMUS_Subject_Matter_mixed.collection                             <dbl> 0, 0…
## $ DOMUS_Subject_Matter_science.and.industry                         <dbl> 0, 0…
## $ DOMUS_Subject_Matter_social.history                               <dbl> 0, 0…
## $ DOMUS_Subject_Matter_other                                        <dbl> 1, 0…
## $ Primary_provenance_of_data_other                                  <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_Country.Living                       <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_Larger.Towns.and.Cities              <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_London.Cosmopolitan                  <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_Older.Farming.Communities            <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_Rural.Growth.Areas                   <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_Scottish.Countryside                 <dbl> 1, 0…
## $ Area_Geodemographic_subgroup_Sparse.English.and.Welsh.Countryside <dbl> 0, 0…
## $ Area_Geodemographic_subgroup_other                                <dbl> 0, 0…

Specify Model

library(usemodels)
## Warning: package 'usemodels' was built under R version 4.4.1
#usemodels::use_xgboost(Attrition ~ ., data = museums_train)

xgboost_spec <- 
  boost_tree(trees = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(museums_rec) %>% 
  add_model(xgboost_spec) 

Tune Hyperparameters

doParallel::registerDoParallel()

set.seed(47927)
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = museums_cv, 
            grid = 5)