Goal is to predict attrition, employees who are likely to leave the company.

Import Data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(correlationfunnel)
## ══ correlationfunnel Tip #3 ════════════════════════════════════════════════════
## Using `binarize()` with data containing many columns or many rows can increase dimensionality substantially.
## Try subsetting your data column-wise or row-wise to avoid creating too many columns.
## You can always make a big problem smaller by sampling. :)
library(dplyr)

data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore data

skimr::skim(data)
Data summary
Name data
Number of rows 4191
Number of columns 35
_______________________
Column type frequency:
character 24
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
museum_id 0 1.00 8 15 0 4191 0
Name_of_museum 0 1.00 3 76 0 4190 0
Address_line_1 441 0.89 3 61 0 3212 0
Address_line_2 2816 0.33 3 39 0 1167 0
Village,_Town_or_City 4 1.00 3 24 0 1696 0
Postcode 0 1.00 6 9 0 3918 0
Admin_area 0 1.00 12 137 0 393 0
Accreditation 0 1.00 10 12 0 2 0
Governance 0 1.00 7 41 0 13 0
Size 0 1.00 4 7 0 5 0
Size_provenance 179 0.96 2 29 0 16 0
Subject_Matter 0 1.00 5 45 0 114 0
Year_opened 0 1.00 9 9 0 351 0
Year_closed 0 1.00 9 9 0 170 0
DOMUS_Subject_Matter 2788 0.33 5 27 0 21 0
Primary_provenance_of_data 0 1.00 3 8 0 18 0
Identifier_used_in_primary_data_source 2056 0.51 2 8 0 2134 0
Area_Geodemographic_group 49 0.99 11 40 0 17 0
Area_Geodemographic_group_code 49 0.99 3 3 0 16 0
Area_Geodemographic_subgroup 49 0.99 12 39 0 25 0
Area_Geodemographic_subgroup_code 49 0.99 4 4 0 24 0
Area_Geodemographic_supergroup 49 0.99 16 39 0 8 0
Area_Geodemographic_supergroup_code 49 0.99 2 2 0 8 0
Notes 2980 0.29 12 751 0 956 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Latitude 0 1.00 52.93 2.09 49.18 51.48 52.47 53.96 100.00 ▇▁▁▁▁
Longitude 0 1.00 -1.96 1.84 -8.09 -3.10 -1.87 -0.48 1.76 ▁▂▇▇▅
DOMUS_identifier 2347 0.44 1303.45 1597.19 1.00 486.50 991.50 1470.25 7746.00 ▇▂▁▁▁
Area_Deprivation_index 49 0.99 5.44 2.48 1.00 4.00 5.00 7.00 10.00 ▃▆▇▆▃
Area_Deprivation_index_crime 49 0.99 5.43 3.07 1.00 3.00 6.00 8.00 10.00 ▇▆▅▇▇
Area_Deprivation_index_education 49 0.99 6.04 2.61 1.00 4.00 6.00 8.00 10.00 ▃▅▇▇▆
Area_Deprivation_index_employment 49 0.99 6.08 2.76 1.00 4.00 6.00 8.00 10.00 ▅▆▇▇▇
Area_Deprivation_index_health 49 0.99 6.02 2.82 1.00 4.00 6.00 8.00 10.00 ▅▆▆▇▇
Area_Deprivation_index_housing 49 0.99 3.97 2.75 1.00 1.00 3.00 6.00 10.00 ▇▅▃▂▂
Area_Deprivation_index_income 49 0.99 5.99 2.62 1.00 4.00 6.00 8.00 10.00 ▃▆▇▇▆
Area_Deprivation_index_services 49 0.99 4.78 3.01 1.00 2.00 4.00 7.00 10.00 ▇▅▅▅▅

Issues with data * Missing values * Factors or numeric variables * Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code, Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services * Zero variance variables * Character variables: Convert them to numbers in recipe step * Unbalanced target variable: Accreditation * ID variable: museum_id

library(stringr)

data_clean <- data %>%
    # Remove unnecessary columns
    select(-c(Size_provenance, DOMUS_Subject_Matter, Year_closed, Primary_provenance_of_data, 
              Identifier_used_in_primary_data_source, Area_Geodemographic_subgroup, 
              Area_Geodemographic_subgroup_code, Area_Geodemographic_supergroup, 
              Area_Geodemographic_supergroup_code, Notes, Latitude, Longitude, DOMUS_identifier, 
              Address_line_2, Postcode, Admin_area, Area_Geodemographic_group, Name_of_museum)) %>%
    # Remove rows with missing values
    na.omit() %>%
    
    # Change column name and adjust Years opened
    rename("Village_Town_City" = "Village,_Town_or_City") %>%
    mutate(Year_opened = str_sub(Year_opened, 1, 4)) %>%
    
    # Convert selected columns to factors
    mutate(across(c(Accreditation, Governance, Size, Subject_Matter, 
                    Area_Geodemographic_group_code, Area_Deprivation_index, 
                    Area_Deprivation_index_crime, Area_Deprivation_index_education, 
                    Area_Deprivation_index_employment, Area_Deprivation_index_health, 
                    Area_Deprivation_index_housing, Area_Deprivation_index_income, 
                    Area_Deprivation_index_services), as.factor)) %>%
    # Recode Accreditation
    mutate(Accreditation = if_else(Accreditation == "Accredited", "Yes", "No"))

# Preview the cleaned data
glimpse(data_clean)
## Rows: 3,708
## Columns: 17
## $ museum_id                         <chr> "mm.New.1", "mm.domus.WM019", "mm.ai…
## $ Address_line_1                    <chr> "1 Olympic Way", "Warwick College of…
## $ Village_Town_City                 <chr> "Belfast", "Moreton Morrell", "Chelt…
## $ Accreditation                     <chr> "No", "No", "Yes", "No", "No", "Yes"…
## $ Governance                        <fct> Independent-Not_for_profit, Governme…
## $ Size                              <fct> large, medium, medium, small, small,…
## $ Subject_Matter                    <fct> Sea_and_seafaring-Boats_and_ships, R…
## $ Year_opened                       <chr> "2012", "1984", "2013", "1996", "198…
## $ Area_Deprivation_index            <fct> 2, 8, 8, 2, 6, 6, 5, 6, 3, 7, 5, 8, …
## $ Area_Deprivation_index_crime      <fct> 3, 9, 10, 1, 10, 3, 1, 10, 1, 7, 10,…
## $ Area_Deprivation_index_education  <fct> 1, 8, 7, 6, 8, 7, 7, 6, 4, 7, 6, 6, …
## $ Area_Deprivation_index_employment <fct> 2, 10, 7, 3, 7, 6, 6, 7, 2, 7, 6, 8,…
## $ Area_Deprivation_index_health     <fct> 1, 8, 8, 2, 7, 8, 5, 7, 2, 9, 5, 8, …
## $ Area_Deprivation_index_housing    <fct> 4, 5, 7, 1, 8, 9, 1, 7, 6, 7, 7, 5, …
## $ Area_Deprivation_index_income     <fct> 5, 8, 8, 3, 5, 5, 7, 5, 3, 8, 6, 8, …
## $ Area_Deprivation_index_services   <fct> 5, 1, 4, 4, 2, 3, 9, 1, 9, 1, 1, 2, …
## $ Area_Geodemographic_group_code    <fct> 2ar, 3ar, 7ar, 5ar, 3cr, 6br, 2ar, 3…

Explore data

Accredited vs. Unaccredited

data_clean %>% count(Accreditation)
## # A tibble: 2 × 2
##   Accreditation     n
##   <chr>         <int>
## 1 No             2040
## 2 Yes            1668
data_clean %>%
    ggplot(aes(Accreditation)) +
    geom_bar()

Most common Subjects

data_clean %>% count(Subject_Matter) %>% arrange(desc(n))
## # A tibble: 114 × 2
##    Subject_Matter                     n
##    <fct>                          <int>
##  1 Local_Histories                  853
##  2 Buildings-Houses-Large_houses    200
##  3 Arts-Fine_and_decorative_arts    186
##  4 War_and_conflict-Regiment        128
##  5 Transport-Trains_and_railways    127
##  6 Mixed-Encyclopaedic              115
##  7 Personality-Literary              80
##  8 Mixed-Other                       78
##  9 Other                             71
## 10 Buildings-Houses-Medium_houses    67
## # ℹ 104 more rows
data_clean %>%
    ggplot(aes(x= Subject_Matter)) +
    geom_bar()

Years museums opened

data_clean %>%
    ggplot(aes(Year_opened, Accreditation)) +
    geom_count()

correlation plot

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-museum_id) %>%
    binarize()

data_binarized %>% glimpse()
## Rows: 3,708
## Columns: 175
## $ Address_line_1__High_Street                                     <dbl> 0, 0, …
## $ `Address_line_1__-OTHER`                                        <dbl> 1, 1, …
## $ Village_Town_City__Edinburgh                                    <dbl> 0, 0, …
## $ Village_Town_City__London                                       <dbl> 0, 0, …
## $ `Village_Town_City__-OTHER`                                     <dbl> 1, 1, …
## $ Accreditation__No                                               <dbl> 1, 1, …
## $ Accreditation__Yes                                              <dbl> 0, 0, …
## $ `Governance__Government-Local_Authority`                        <dbl> 0, 1, …
## $ `Governance__Government-National`                               <dbl> 0, 0, …
## $ `Governance__Independent-English_Heritage`                      <dbl> 0, 0, …
## $ `Governance__Independent-National_Trust`                        <dbl> 0, 0, …
## $ `Governance__Independent-Not_for_profit`                        <dbl> 1, 0, …
## $ `Governance__Independent-Private`                               <dbl> 0, 0, …
## $ `Governance__Independent-Unknown`                               <dbl> 0, 0, …
## $ Governance__University                                          <dbl> 0, 0, …
## $ Governance__Unknown                                             <dbl> 0, 0, …
## $ `Governance__-OTHER`                                            <dbl> 0, 0, …
## $ Size__large                                                     <dbl> 1, 0, …
## $ Size__medium                                                    <dbl> 0, 1, …
## $ Size__small                                                     <dbl> 0, 0, …
## $ Size__unknown                                                   <dbl> 0, 0, …
## $ `Size__-OTHER`                                                  <dbl> 0, 0, …
## $ `Subject_Matter__Archaeology-Roman`                             <dbl> 0, 0, …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts`                 <dbl> 0, 0, …
## $ `Subject_Matter__Buildings-Houses-Large_houses`                 <dbl> 0, 0, …
## $ `Subject_Matter__Buildings-Houses-Medium_houses`                <dbl> 0, 0, …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying` <dbl> 0, 0, …
## $ `Subject_Matter__Leisure_and_sport-Toys_and_models`             <dbl> 0, 0, …
## $ Subject_Matter__Local_Histories                                 <dbl> 0, 0, …
## $ `Subject_Matter__Mixed-Encyclopaedic`                           <dbl> 0, 0, …
## $ `Subject_Matter__Mixed-Other`                                   <dbl> 0, 0, …
## $ Subject_Matter__Other                                           <dbl> 0, 0, …
## $ `Subject_Matter__Personality-Literary`                          <dbl> 0, 0, …
## $ `Subject_Matter__Rural_Industry-Farming`                        <dbl> 0, 1, …
## $ `Subject_Matter__Sea_and_seafaring-Boats_and_ships`             <dbl> 1, 0, …
## $ `Subject_Matter__Sea_and_seafaring-Mixed`                       <dbl> 0, 0, …
## $ `Subject_Matter__Transport-Cars_and_motorbikes`                 <dbl> 0, 0, …
## $ `Subject_Matter__Transport-Trains_and_railways`                 <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Airforce`                     <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Castles_and_forts`            <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Military`                     <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Regiment`                     <dbl> 0, 0, …
## $ `Subject_Matter__-OTHER`                                        <dbl> 0, 0, …
## $ Year_opened__1945                                               <dbl> 0, 0, …
## $ Year_opened__1960                                               <dbl> 0, 0, …
## $ Year_opened__1972                                               <dbl> 0, 0, …
## $ Year_opened__1973                                               <dbl> 0, 0, …
## $ Year_opened__1974                                               <dbl> 0, 0, …
## $ Year_opened__1975                                               <dbl> 0, 0, …
## $ Year_opened__1976                                               <dbl> 0, 0, …
## $ Year_opened__1977                                               <dbl> 0, 0, …
## $ Year_opened__1978                                               <dbl> 0, 0, …
## $ Year_opened__1979                                               <dbl> 0, 0, …
## $ Year_opened__1980                                               <dbl> 0, 0, …
## $ Year_opened__1981                                               <dbl> 0, 0, …
## $ Year_opened__1982                                               <dbl> 0, 0, …
## $ Year_opened__1983                                               <dbl> 0, 0, …
## $ Year_opened__1984                                               <dbl> 0, 1, …
## $ Year_opened__1985                                               <dbl> 0, 0, …
## $ Year_opened__1986                                               <dbl> 0, 0, …
## $ Year_opened__1987                                               <dbl> 0, 0, …
## $ Year_opened__1988                                               <dbl> 0, 0, …
## $ Year_opened__1989                                               <dbl> 0, 0, …
## $ Year_opened__1990                                               <dbl> 0, 0, …
## $ Year_opened__1991                                               <dbl> 0, 0, …
## $ Year_opened__1992                                               <dbl> 0, 0, …
## $ Year_opened__1993                                               <dbl> 0, 0, …
## $ Year_opened__1994                                               <dbl> 0, 0, …
## $ Year_opened__1995                                               <dbl> 0, 0, …
## $ Year_opened__1996                                               <dbl> 0, 0, …
## $ Year_opened__1997                                               <dbl> 0, 0, …
## $ Year_opened__1998                                               <dbl> 0, 0, …
## $ Year_opened__1999                                               <dbl> 0, 0, …
## $ Year_opened__2000                                               <dbl> 0, 0, …
## $ Year_opened__2002                                               <dbl> 0, 0, …
## $ Year_opened__2005                                               <dbl> 0, 0, …
## $ Year_opened__2012                                               <dbl> 1, 0, …
## $ Year_opened__2014                                               <dbl> 0, 0, …
## $ `Year_opened__-OTHER`                                           <dbl> 0, 0, …
## $ Area_Deprivation_index__1                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__2                                       <dbl> 1, 0, …
## $ Area_Deprivation_index__3                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__4                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__5                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__6                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__7                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__8                                       <dbl> 0, 1, …
## $ Area_Deprivation_index__9                                       <dbl> 0, 0, …
## $ Area_Deprivation_index__10                                      <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__1                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__2                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__3                                 <dbl> 1, 0, …
## $ Area_Deprivation_index_crime__4                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__5                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__6                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__7                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__8                                 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__9                                 <dbl> 0, 1, …
## $ Area_Deprivation_index_crime__10                                <dbl> 0, 0, …
## $ Area_Deprivation_index_education__1                             <dbl> 1, 0, …
## $ Area_Deprivation_index_education__2                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__3                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__4                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__5                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__6                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__7                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__8                             <dbl> 0, 1, …
## $ Area_Deprivation_index_education__9                             <dbl> 0, 0, …
## $ Area_Deprivation_index_education__10                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__1                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__2                            <dbl> 1, 0, …
## $ Area_Deprivation_index_employment__3                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__4                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__5                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__6                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__7                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__8                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__9                            <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__10                           <dbl> 0, 1, …
## $ Area_Deprivation_index_health__1                                <dbl> 1, 0, …
## $ Area_Deprivation_index_health__2                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__3                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__4                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__5                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__6                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__7                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__8                                <dbl> 0, 1, …
## $ Area_Deprivation_index_health__9                                <dbl> 0, 0, …
## $ Area_Deprivation_index_health__10                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__1                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__2                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__3                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__4                               <dbl> 1, 0, …
## $ Area_Deprivation_index_housing__5                               <dbl> 0, 1, …
## $ Area_Deprivation_index_housing__6                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__7                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__8                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__9                               <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__10                              <dbl> 0, 0, …
## $ Area_Deprivation_index_income__1                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__2                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__3                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__4                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__5                                <dbl> 1, 0, …
## $ Area_Deprivation_index_income__6                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__7                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__8                                <dbl> 0, 1, …
## $ Area_Deprivation_index_income__9                                <dbl> 0, 0, …
## $ Area_Deprivation_index_income__10                               <dbl> 0, 0, …
## $ Area_Deprivation_index_services__1                              <dbl> 0, 1, …
## $ Area_Deprivation_index_services__2                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__3                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__4                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__5                              <dbl> 1, 0, …
## $ Area_Deprivation_index_services__6                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__7                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__8                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__9                              <dbl> 0, 0, …
## $ Area_Deprivation_index_services__10                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__1ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__1br                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__2ar                             <dbl> 1, 0, …
## $ Area_Geodemographic_group_code__2br                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__3ar                             <dbl> 0, 1, …
## $ Area_Geodemographic_group_code__3br                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__3cr                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__4ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__5ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__6ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__6br                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7br                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7cr                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__8ar                             <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__8br                             <dbl> 0, 0, …
# Step 2: correlate
data_correlation <- data_binarized %>%
    correlate(Accreditation__Yes)

data_correlation
## # A tibble: 175 × 3
##    feature        bin                        correlation
##    <fct>          <chr>                            <dbl>
##  1 Accreditation  No                              -1    
##  2 Accreditation  Yes                              1    
##  3 Governance     Independent-Private             -0.353
##  4 Size           small                           -0.326
##  5 Size           medium                           0.253
##  6 Size           large                            0.242
##  7 Governance     Government-Local_Authority       0.221
##  8 Governance     Independent-Unknown             -0.202
##  9 Size           unknown                         -0.166
## 10 Subject_Matter -OTHER                          -0.147
## # ℹ 165 more rows
# Step 3: Plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 161 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Model Building

Split Data

library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.5      ✔ rsample      1.2.1 
## ✔ dials        1.2.1      ✔ tune         1.2.1 
## ✔ infer        1.0.7      ✔ workflows    1.1.4 
## ✔ modeldata    1.4.0      ✔ workflowsets 1.1.0 
## ✔ parsnip      1.2.1      ✔ yardstick    1.3.1 
## ✔ recipes      1.0.10
## Warning: package 'modeldata' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
set.seed(1234)
data_clean <- data_clean %>% sample_n(1000)

data_split <- initial_split(data_clean, strata = Accreditation)
data_train <- training(data_split)
data_test <- testing(data_split)

data_cv <- rsample::vfold_cv(data_train, strata = Accreditation)
data_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits           id    
##    <list>           <chr> 
##  1 <split [674/75]> Fold01
##  2 <split [674/75]> Fold02
##  3 <split [674/75]> Fold03
##  4 <split [674/75]> Fold04
##  5 <split [674/75]> Fold05
##  6 <split [674/75]> Fold06
##  7 <split [674/75]> Fold07
##  8 <split [674/75]> Fold08
##  9 <split [674/75]> Fold09
## 10 <split [675/74]> Fold10

{r} # library(usemodels) # use_xgboost(like_count ~ ., data = data_train) #

Preprocess data

skimr::skim(data_clean)
Data summary
Name data_clean
Number of rows 1000
Number of columns 17
_______________________
Column type frequency:
character 5
factor 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
museum_id 0 1 8 15 0 1000 0
Address_line_1 0 1 4 52 0 925 0
Village_Town_City 0 1 3 24 0 618 0
Accreditation 0 1 2 3 0 2 0
Year_opened 0 1 4 4 0 161 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Governance 0 1 FALSE 12 Ind: 439, Gov: 251, Ind: 119, Ind: 40
Size 0 1 FALSE 5 sma: 553, med: 258, lar: 136, unk: 51
Subject_Matter 0 1 FALSE 108 Loc: 244, Art: 57, Bui: 50, War: 34
Area_Deprivation_index 0 1 FALSE 10 6: 133, 4: 132, 5: 122, 7: 114
Area_Deprivation_index_crime 0 1 FALSE 10 1: 162, 2: 118, 4: 102, 10: 99
Area_Deprivation_index_education 0 1 FALSE 10 7: 146, 5: 116, 8: 113, 6: 111
Area_Deprivation_index_employment 0 1 FALSE 10 10: 147, 5: 119, 7: 110, 4: 109
Area_Deprivation_index_health 0 1 FALSE 10 10: 126, 5: 123, 8: 116, 7: 106
Area_Deprivation_index_housing 0 1 FALSE 10 1: 261, 2: 150, 3: 127, 4: 86
Area_Deprivation_index_income 0 1 FALSE 10 6: 127, 5: 121, 4: 119, 7: 115
Area_Deprivation_index_services 0 1 FALSE 10 1: 166, 9: 107, 2: 106, 3: 99
Area_Geodemographic_group_code 0 1 FALSE 16 3ar: 164, 2ar: 106, 3br: 102, 7ar: 96
library(themis)
library(recipes)
library(textrecipes)

xgboost_recipe <- recipes::recipe(Accreditation ~ ., data = data_train) %>%
    update_role(museum_id, new_role = "ID") %>%
    step_tokenize(Village_Town_City) %>%
    step_tokenfilter(Village_Town_City, max_tokens = 50) %>%
    step_tf(Village_Town_City) %>%
    step_novel(all_nominal_predictors()) %>%
    step_dummy(all_nominal_predictors()) %>%
    step_smote(Accreditation)
    

xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## New names:
## • `Address_line_1_Units.1...2` -> `Address_line_1_Units.1`
## Rows: 818
## Columns: 1,135
## $ museum_id                                                          <fct> mm.…
## $ Accreditation                                                      <fct> No,…
## $ tf_Village_Town_City_aberdeen                                      <dbl> 0, …
## $ tf_Village_Town_City_anstruther                                    <dbl> 0, …
## $ tf_Village_Town_City_bath                                          <dbl> 0, …
## $ tf_Village_Town_City_belfast                                       <dbl> 0, …
## $ tf_Village_Town_City_birmingham                                    <dbl> 0, …
## $ tf_Village_Town_City_brighton                                      <dbl> 0, …
## $ tf_Village_Town_City_bristol                                       <dbl> 0, …
## $ tf_Village_Town_City_bury                                          <dbl> 0, …
## $ tf_Village_Town_City_by                                            <dbl> 0, …
## $ tf_Village_Town_City_cambridge                                     <dbl> 0, …
## $ tf_Village_Town_City_cardiff                                       <dbl> 0, …
## $ tf_Village_Town_City_chester                                       <dbl> 0, …
## $ tf_Village_Town_City_chichester                                    <dbl> 0, …
## $ tf_Village_Town_City_colchester                                    <dbl> 0, …
## $ tf_Village_Town_City_cowes                                         <dbl> 0, …
## $ tf_Village_Town_City_doncaster                                     <dbl> 0, …
## $ tf_Village_Town_City_dundee                                        <dbl> 0, …
## $ tf_Village_Town_City_east                                          <dbl> 0, …
## $ tf_Village_Town_City_eastbourne                                    <dbl> 0, …
## $ tf_Village_Town_City_edinburgh                                     <dbl> 0, …
## $ tf_Village_Town_City_edmunds                                       <dbl> 0, …
## $ tf_Village_Town_City_exeter                                        <dbl> 0, …
## $ tf_Village_Town_City_glasgow                                       <dbl> 0, …
## $ tf_Village_Town_City_isle                                          <dbl> 0, …
## $ tf_Village_Town_City_kings                                         <dbl> 0, …
## $ tf_Village_Town_City_lancaster                                     <dbl> 0, …
## $ tf_Village_Town_City_leeds                                         <dbl> 0, …
## $ tf_Village_Town_City_leicester                                     <dbl> 0, …
## $ tf_Village_Town_City_lerwick                                       <dbl> 0, …
## $ tf_Village_Town_City_liverpool                                     <dbl> 0, …
## $ tf_Village_Town_City_llangollen                                    <dbl> 0, …
## $ tf_Village_Town_City_london                                        <dbl> 0, …
## $ tf_Village_Town_City_lynn                                          <dbl> 0, …
## $ tf_Village_Town_City_manchester                                    <dbl> 0, …
## $ tf_Village_Town_City_milton                                        <dbl> 0, …
## $ tf_Village_Town_City_newcastle                                     <dbl> 0, …
## $ tf_Village_Town_City_newport                                       <dbl> 1, …
## $ tf_Village_Town_City_north                                         <dbl> 0, …
## $ tf_Village_Town_City_northampton                                   <dbl> 0, …
## $ tf_Village_Town_City_norwich                                       <dbl> 0, …
## $ tf_Village_Town_City_nr                                            <dbl> 0, …
## $ tf_Village_Town_City_of                                            <dbl> 0, …
## $ tf_Village_Town_City_on                                            <dbl> 0, …
## $ tf_Village_Town_City_sheffield                                     <dbl> 0, …
## $ tf_Village_Town_City_spalding                                      <dbl> 0, …
## $ tf_Village_Town_City_st                                            <dbl> 0, …
## $ tf_Village_Town_City_tyne                                          <dbl> 0, …
## $ tf_Village_Town_City_upon                                          <dbl> 0, …
## $ tf_Village_Town_City_wells                                         <dbl> 0, …
## $ tf_Village_Town_City_york                                          <dbl> 0, …
## $ Address_line_1_X1.Scala.St                                         <dbl> 0, …
## $ Address_line_1_X1.Senhouse.Street                                  <dbl> 0, …
## $ Address_line_1_X1.Warehouse                                        <dbl> 0, …
## $ Address_line_1_X1.West.Street                                      <dbl> 0, …
## $ Address_line_1_X1.3.Richmond.Road                                  <dbl> 0, …
## $ Address_line_1_X10.Buxton.Rd                                       <dbl> 0, …
## $ Address_line_1_X10.Stone.Buildings                                 <dbl> 0, …
## $ Address_line_1_X100.Church.Street                                  <dbl> 0, …
## $ Address_line_1_X100.London.Road                                    <dbl> 0, …
## $ Address_line_1_X100.100A.Hackford.Road                             <dbl> 0, …
## $ Address_line_1_X107.High.Street                                    <dbl> 0, …
## $ Address_line_1_X109.Head.Street                                    <dbl> 0, …
## $ Address_line_1_X109A.High.Street                                   <dbl> 0, …
## $ Address_line_1_X11.Spring.Hill                                     <dbl> 0, …
## $ Address_line_1_X12.Crooms.Hill                                     <dbl> 0, …
## $ Address_line_1_X12.13.New.Wharf.Road                               <dbl> 0, …
## $ Address_line_1_X126.High.Street                                    <dbl> 0, …
## $ Address_line_1_X129.Muir.Street                                    <dbl> 0, …
## $ Address_line_1_X13.Berkeley.Street                                 <dbl> 0, …
## $ Address_line_1_X13.High.Street                                     <dbl> 0, …
## $ Address_line_1_X134.Mount.Merrion.Avenue                           <dbl> 0, …
## $ Address_line_1_X136.Kingsland.Road                                 <dbl> 0, …
## $ Address_line_1_X13a.Laporte.Precinct                               <dbl> 0, …
## $ Address_line_1_X14.High.Street                                     <dbl> 0, …
## $ Address_line_1_X15.Lodge.Street                                    <dbl> 0, …
## $ Address_line_1_X16.Bridge.Street                                   <dbl> 0, …
## $ Address_line_1_X16.Rutland.Way                                     <dbl> 0, …
## $ Address_line_1_X16.18.Queen.Square                                 <dbl> 0, …
## $ Address_line_1_X169.High.Street                                    <dbl> 0, …
## $ Address_line_1_X17.Donegall.Square.North                           <dbl> 0, …
## $ Address_line_1_X17.18.The.Hard                                     <dbl> 0, …
## $ Address_line_1_X170.Salmon.Lane                                    <dbl> 0, …
## $ Address_line_1_X19.Church.Rd                                       <dbl> 0, …
## $ Address_line_1_X194.High.Street                                    <dbl> 0, …
## $ Address_line_1_X194.Odmaston.Road                                  <dbl> 0, …
## $ Address_line_1_X195.Aylestone.Road                                 <dbl> 0, …
## $ Address_line_1_X2.Boneybefore.Road                                 <dbl> 0, …
## $ Address_line_1_X2.Buddle.Street                                    <dbl> 0, …
## $ Address_line_1_X2.Carlisle.Road                                    <dbl> 0, …
## $ Address_line_1_X2.Castle.Street                                    <dbl> 0, …
## $ Address_line_1_X2.Old.Bank                                         <dbl> 0, …
## $ Address_line_1_X2.Victoria.Parade                                  <dbl> 0, …
## $ Address_line_1_X20.Cornfield.Terrace                               <dbl> 0, …
## $ Address_line_1_X20.Maresfield.Gardens                              <dbl> 0, …
## $ Address_line_1_X20.Princes.Gate                                    <dbl> 0, …
## $ Address_line_1_X21.South.Street                                    <dbl> 0, …
## $ Address_line_1_X217.Wakeham                                        <dbl> 0, …
## $ Address_line_1_X22.St.Georges.Road                                 <dbl> 0, …
## $ Address_line_1_X224.228.Great.Portland.Street                      <dbl> 0, …
## $ Address_line_1_X224.238.Kensington.High.Street                     <dbl> 0, …
## $ Address_line_1_X23.27.High.Street                                  <dbl> 0, …
## $ Address_line_1_X233.Bangor.Road                                    <dbl> 0, …
## $ Address_line_1_X24.Cheyne.Row                                      <dbl> 0, …
## $ Address_line_1_X24.Limes.Avenue                                    <dbl> 0, …
## $ Address_line_1_X24.Main.Street                                     <dbl> 0, …
## $ Address_line_1_X240.Swithland.Lane                                 <dbl> 0, …
## $ Address_line_1_X25.Brook.Street                                    <dbl> 0, …
## $ Address_line_1_X25.27.West.Street                                  <dbl> 0, …
## $ Address_line_1_X250c.King.Road.Arches                              <dbl> 0, …
## $ Address_line_1_X257.Castle.St                                      <dbl> 0, …
## $ Address_line_1_X268.Waldegrave.Road                                <dbl> 0, …
## $ Address_line_1_X27.Grosvenor.Street                                <dbl> 0, …
## $ Address_line_1_X27.Lower.Fore.Street                               <dbl> 0, …
## $ Address_line_1_X27.Lowther.Street                                  <dbl> 0, …
## $ Address_line_1_X28.Plough.Hill                                     <dbl> 0, …
## $ Address_line_1_X28.Scotts.Place                                    <dbl> 0, …
## $ Address_line_1_X29.French.Street                                   <dbl> 0, …
## $ Address_line_1_X3.Commercial.Street                                <dbl> 0, …
## $ Address_line_1_X3.East.Park.Road                                   <dbl> 0, …
## $ Address_line_1_X3.East.Street                                      <dbl> 0, …
## $ Address_line_1_X3.Main.Street                                      <dbl> 0, …
## $ Address_line_1_X3.Shore.Road                                       <dbl> 0, …
## $ Address_line_1_X3.West.Street                                      <dbl> 0, …
## $ Address_line_1_X31.Wood.Street                                     <dbl> 0, …
## $ Address_line_1_X320.Camp.Road                                      <dbl> 0, …
## $ Address_line_1_X33.The.Square                                      <dbl> 0, …
## $ Address_line_1_X346.High.Street                                    <dbl> 0, …
## $ Address_line_1_X35.The.Meadows                                     <dbl> 0, …
## $ Address_line_1_X35.Townfield.Gardens                               <dbl> 0, …
## $ Address_line_1_X36.High.Street                                     <dbl> 0, …
## $ Address_line_1_X368.Cregagh.Road                                   <dbl> 0, …
## $ Address_line_1_X39.Christchurch.Road                               <dbl> 0, …
## $ Address_line_1_X4.Buckland.Terrace                                 <dbl> 0, …
## $ Address_line_1_X40.Southwark.Street                                <dbl> 0, …
## $ Address_line_1_X41.Devonshire.Place                                <dbl> 0, …
## $ Address_line_1_X42.High.Street                                     <dbl> 0, …
## $ Address_line_1_X45.Market.Street                                   <dbl> 0, …
## $ Address_line_1_X46.Clwyd.Street                                    <dbl> 0, …
## $ Address_line_1_X46.Queen.Street                                    <dbl> 0, …
## $ Address_line_1_X46.50.Copperfield.Road                             <dbl> 0, …
## $ Address_line_1_X5.6.St..Matthews.Street                            <dbl> 0, …
## $ Address_line_1_X5.7.Mandeville.Place                               <dbl> 0, …
## $ Address_line_1_X501.Wherstead.Road                                 <dbl> 0, …
## $ Address_line_1_X51.Culver.Road                                     <dbl> 0, …
## $ Address_line_1_X52.Alfred.Street                                   <dbl> 0, …
## $ Address_line_1_X55.63.Hurst.Street                                 <dbl> 0, …
## $ Address_line_1_X57a.Newton.Street                                  <dbl> 0, …
## $ Address_line_1_X58a.High.Street                                    <dbl> 0, …
## $ Address_line_1_X6.Angel.Hill                                       <dbl> 0, …
## $ Address_line_1_X6.Chapel.Street                                    <dbl> 0, …
## $ Address_line_1_X6.Church.Close                                     <dbl> 0, …
## $ Address_line_1_X6.King.Street                                      <dbl> 0, …
## $ Address_line_1_X6.The.Square                                       <dbl> 0, …
## $ Address_line_1_X6.7.Queens.Terrace                                 <dbl> 0, …
## $ Address_line_1_X60.62.Nelson.Street                                <dbl> 0, …
## $ Address_line_1_X61.Brook.Street                                    <dbl> 0, …
## $ Address_line_1_X61.Chestergate                                     <dbl> 0, …
## $ Address_line_1_X62.64.Marketgate                                   <dbl> 0, …
## $ Address_line_1_X670.Warwick.Road                                   <dbl> 0, …
## $ Address_line_1_X69.High.St                                         <dbl> 0, …
## $ Address_line_1_X70.Fore.Street                                     <dbl> 0, …
## $ Address_line_1_X74.High.Street                                     <dbl> 0, …
## $ Address_line_1_X74.The.Headrow                                     <dbl> 0, …
## $ Address_line_1_X75.79.Vyse.Street                                  <dbl> 0, …
## $ Address_line_1_X78.High.Street                                     <dbl> 0, …
## $ Address_line_1_X7A.The.Scores                                      <dbl> 0, …
## $ Address_line_1_X8.Park.Street                                      <dbl> 0, …
## $ Address_line_1_X8.10.Queen.Street                                  <dbl> 0, …
## $ Address_line_1_X80b.The.Chase                                      <dbl> 0, …
## $ Address_line_1_X9.Broad.Street                                     <dbl> 0, …
## $ Address_line_1_X9.Holy.Bones                                       <dbl> 0, …
## $ Address_line_1_X90.Buckingham.Palace.Road                          <dbl> 0, …
## $ Address_line_1_X92.Beacons.Park                                    <dbl> 0, …
## $ Address_line_1_Abbey.Gate                                          <dbl> 0, …
## $ Address_line_1_Abbeymill.Business.Centre                           <dbl> 0, …
## $ Address_line_1_Abbot.Hall                                          <dbl> 0, …
## $ Address_line_1_Aberystwyth.Arts.Centre                             <dbl> 0, …
## $ Address_line_1_Abington.Park                                       <dbl> 0, …
## $ Address_line_1_AGC.Centre                                          <dbl> 0, …
## $ Address_line_1_All.Saints.Church                                   <dbl> 0, …
## $ Address_line_1_All.Saints.Square                                   <dbl> 0, …
## $ Address_line_1_Alumni.House                                        <dbl> 0, …
## $ Address_line_1_American.Express.Stadium                            <dbl> 0, …
## $ Address_line_1_Annay.Road                                          <dbl> 0, …
## $ Address_line_1_Anstruther.Harbour                                  <dbl> 0, …
## $ Address_line_1_Apartment.4                                         <dbl> 0, …
## $ Address_line_1_Appletongate                                        <dbl> 0, …
## $ Address_line_1_April.Cottage                                       <dbl> 0, …
## $ Address_line_1_Arbroath.Library                                    <dbl> 0, …
## $ Address_line_1_Arbuthnot.Museum                                    <dbl> 0, …
## $ Address_line_1_Arundel.Gate                                        <dbl> 0, …
## $ Address_line_1_Ash.Centre                                          <dbl> 0, …
## $ Address_line_1_Ashby.Road                                          <dbl> 0, …
## $ Address_line_1_Ashland                                             <dbl> 0, …
## $ Address_line_1_Ashton.Town.Hall                                    <dbl> 0, …
## $ Address_line_1_Ashworth.Barracks                                   <dbl> 0, …
## $ Address_line_1_Avenue.House                                        <dbl> 0, …
## $ Address_line_1_Aykley.Heads                                        <dbl> 0, …
## $ Address_line_1_Baldwins.Crescent                                   <dbl> 0, …
## $ Address_line_1_Ballbathie                                          <dbl> 0, …
## $ Address_line_1_Bamburgh.Castle                                     <dbl> 0, …
## $ Address_line_1_Bark.House.Lane                                     <dbl> 0, …
## $ Address_line_1_Barkers.Factory.Complex                             <dbl> 0, …
## $ Address_line_1_Barr.St                                             <dbl> 0, …
## $ Address_line_1_Barras.Street                                       <dbl> 0, …
## $ Address_line_1_Barry.Island.Station                                <dbl> 0, …
## $ Address_line_1_Barton.Lane                                         <dbl> 0, …
## $ Address_line_1_Barton.Meade.House                                  <dbl> 0, …
## $ Address_line_1_Bashley.Manor                                       <dbl> 0, …
## $ Address_line_1_Battle.Abbey                                        <dbl> 0, …
## $ Address_line_1_Beach.of.Houbie                                     <dbl> 0, …
## $ Address_line_1_Beacon.Street                                       <dbl> 0, …
## $ Address_line_1_Beckford.Road                                       <dbl> 0, …
## $ Address_line_1_Becks.Square                                        <dbl> 0, …
## $ Address_line_1_Bedale.Hall                                         <dbl> 0, …
## $ Address_line_1_Benenden.Hospital                                   <dbl> 0, …
## $ Address_line_1_Birkenhead.Town.Hall                                <dbl> 0, …
## $ Address_line_1_Blackridge.Library                                  <dbl> 0, …
## $ Address_line_1_Blacksmiths.Shop.Centre                             <dbl> 0, …
## $ Address_line_1_Blakesley.Road                                      <dbl> 0, …
## $ Address_line_1_Blenheim.Palace.Sawmills                            <dbl> 0, …
## $ Address_line_1_Block.H                                             <dbl> 0, …
## $ Address_line_1_Bod.of.Gremista                                     <dbl> 0, …
## $ Address_line_1_Borough.Road                                        <dbl> 0, …
## $ Address_line_1_Boyle.Street                                        <dbl> 0, …
## $ Address_line_1_Brampton.Road                                       <dbl> 0, …
## $ Address_line_1_Bridge.Street                                       <dbl> 0, …
## $ Address_line_1_Broad.Street                                        <dbl> 0, …
## $ Address_line_1_Broadclyst                                          <dbl> 0, …
## $ Address_line_1_Brock.Barracks                                      <dbl> 0, …
## $ Address_line_1_Bronte.Parsonage.Museum                             <dbl> 0, …
## $ Address_line_1_Brookend.Street                                     <dbl> 0, …
## $ Address_line_1_Broomfield                                          <dbl> 0, …
## $ Address_line_1_Bruce.Embankment                                    <dbl> 0, …
## $ Address_line_1_Buile.Hill.Park                                     <dbl> 0, …
## $ Address_line_1_Burnby.Hall.Gardens                                 <dbl> 0, …
## $ Address_line_1_Burslem.Town.Hall                                   <dbl> 0, …
## $ Address_line_1_Bushmead.Community.Centre                           <dbl> 0, …
## $ Address_line_1_Butchery.Lane                                       <dbl> 0, …
## $ Address_line_1_Butterly.Station                                    <dbl> 0, …
## $ Address_line_1_Byland                                              <dbl> 0, …
## $ Address_line_1_c.o.7.Hospital.Lane                                 <dbl> 0, …
## $ Address_line_1_c.o.Chambers.Institute                              <dbl> 0, …
## $ Address_line_1_c.o.Spennymoor.Town.Hall                            <dbl> 0, …
## $ Address_line_1_Callendar.Park                                      <dbl> 0, …
## $ Address_line_1_Camp.Mill                                           <dbl> 0, …
## $ Address_line_1_Canolfan.Iaith.Clwyd                                <dbl> 0, …
## $ Address_line_1_Canolfan.Thomas.Telford                             <dbl> 0, …
## $ Address_line_1_Canterbury.Christ.Church.University                 <dbl> 0, …
## $ Address_line_1_Cark.in.Cartmel                                     <dbl> 0, …
## $ Address_line_1_Castle.Approach                                     <dbl> 0, …
## $ Address_line_1_Castle.Campbell.Hall                                <dbl> 0, …
## $ Address_line_1_Castle.House                                        <dbl> 0, …
## $ Address_line_1_Castle.Lane                                         <dbl> 0, …
## $ Address_line_1_Castle.Street                                       <dbl> 0, …
## $ Address_line_1_Castle.Wynd                                         <dbl> 0, …
## $ Address_line_1_Castleford.Library                                  <dbl> 0, …
## $ Address_line_1_Causeway.House                                      <dbl> 0, …
## $ Address_line_1_Caverswall.Road.Station                             <dbl> 0, …
## $ Address_line_1_Central.Baptist.Church                              <dbl> 0, …
## $ Address_line_1_Central.House                                       <dbl> 0, …
## $ Address_line_1_Central.Museum                                      <dbl> 0, …
## $ Address_line_1_Chapel.Hill                                         <dbl> 0, …
## $ Address_line_1_Chapel.Lane                                         <dbl> 0, …
## $ Address_line_1_Charing.Cross                                       <dbl> 0, …
## $ Address_line_1_Charleston                                          <dbl> 0, …
## $ Address_line_1_Cheney.School                                       <dbl> 0, …
## $ Address_line_1_Chesters.Roman.Fort                                 <dbl> 0, …
## $ Address_line_1_Christ.Church                                       <dbl> 0, …
## $ Address_line_1_Christchurch.Park                                   <dbl> 0, …
## $ Address_line_1_Church.Bay                                          <dbl> 0, …
## $ Address_line_1_Church.Chare                                        <dbl> 0, …
## $ Address_line_1_Church.Hill                                         <dbl> 0, …
## $ Address_line_1_Church.Lane                                         <dbl> 0, …
## $ Address_line_1_Church.Point                                        <dbl> 0, …
## $ Address_line_1_Church.Road                                         <dbl> 0, …
## $ Address_line_1_Church.Square                                       <dbl> 0, …
## $ Address_line_1_Church.Street                                       <dbl> 0, …
## $ Address_line_1_Churchgate                                          <dbl> 0, …
## $ Address_line_1_City.Hall                                           <dbl> 0, …
## $ Address_line_1_Civic.Centre                                        <dbl> 0, …
## $ Address_line_1_Claverton.Manor                                     <dbl> 0, …
## $ Address_line_1_Clay.Lake                                           <dbl> 0, …
## $ Address_line_1_Clifton.Dykes                                       <dbl> 0, …
## $ Address_line_1_Clifton.Lane                                        <dbl> 0, …
## $ Address_line_1_Clive.Steps                                         <dbl> 0, …
## $ Address_line_1_Clynnog.Fawr                                        <dbl> 0, …
## $ Address_line_1_Coalisland.Enterprise.Centre                        <dbl> 0, …
## $ Address_line_1_College.Hill                                        <dbl> 0, …
## $ Address_line_1_Conifer.Cottage                                     <dbl> 0, …
## $ Address_line_1_Corrigall                                           <dbl> 0, …
## $ Address_line_1_Cotehele.Quay                                       <dbl> 0, …
## $ Address_line_1_Cottage.Lane                                        <dbl> 0, …
## $ Address_line_1_County.Park                                         <dbl> 0, …
## $ Address_line_1_County.Sessions.House                               <dbl> 0, …
## $ Address_line_1_Court.7.Rear.of.Shakespeare.Public.House            <dbl> 0, …
## $ Address_line_1_Cranmore.Railway.Station                            <dbl> 0, …
## $ Address_line_1_Crich                                               <dbl> 0, …
## $ Address_line_1_Cromwell.Road                                       <dbl> 0, …
## $ Address_line_1_Cross.Street                                        <dbl> 0, …
## $ Address_line_1_Cunard.Building                                     <dbl> 0, …
## $ Address_line_1_Custom.House                                        <dbl> 0, …
## $ Address_line_1_Cusworth.Hall                                       <dbl> 0, …
## $ Address_line_1_Cypress.Road                                        <dbl> 0, …
## $ Address_line_1_Dalkeith.Hall                                       <dbl> 0, …
## $ Address_line_1_Darby.Road                                          <dbl> 0, …
## $ Address_line_1_Daws.House                                          <dbl> 0, …
## $ Address_line_1_Defence.Intelligence...Security.Centre              <dbl> 0, …
## $ Address_line_1_Dent                                                <dbl> 0, …
## $ Address_line_1_Department.of.Earth.Sciences                        <dbl> 0, …
## $ Address_line_1_Department.of.Plant.Sciences                        <dbl> 0, …
## $ Address_line_1_Depot.Road                                          <dbl> 0, …
## $ Address_line_1_Derwent.Valley.Visitor.Centre                       <dbl> 0, …
## $ Address_line_1_Derwent.Works                                       <dbl> 0, …
## $ Address_line_1_Devonshire.Park                                     <dbl> 0, …
## $ Address_line_1_Dick.Institute                                      <dbl> 0, …
## $ Address_line_1_Dinosaur.Farm                                       <dbl> 1, …
## $ Address_line_1_Dinting.Lane                                        <dbl> 0, …
## $ Address_line_1_Discovery.Centre                                    <dbl> 0, …
## $ Address_line_1_Discovery.Point                                     <dbl> 0, …
## $ Address_line_1_Discovery.Road                                      <dbl> 0, …
## $ Address_line_1_Dogdyke.Road                                        <dbl> 0, …
## $ Address_line_1_Doncaster.Museum...Art.Gallery                      <dbl> 0, …
## $ Address_line_1_Dornoch.Heritage.Society.Trust                      <dbl> 0, …
## $ Address_line_1_Dre.Fach.Felindre                                   <dbl> 0, …
## $ Address_line_1_Dundee.Road                                         <dbl> 0, …
## $ Address_line_1_Dunrobin.Castle                                     <dbl> 0, …
## $ Address_line_1_Dunster                                             <dbl> 0, …
## $ Address_line_1_Duxford.Airfield                                    <dbl> 0, …
## $ Address_line_1_Dyrham.Park                                         <dbl> 0, …
## $ Address_line_1_Earlswood.Village.Hall                              <dbl> 0, …
## $ Address_line_1_East.Cottages                                       <dbl> 0, …
## $ Address_line_1_East.Princes.Street.Gardens                         <dbl> 0, …
## $ Address_line_1_Easterbook                                          <dbl> 0, …
## $ Address_line_1_Eden.Valley.Museum                                  <dbl> 0, …
## $ Address_line_1_Edwards.Building                                    <dbl> 0, …
## $ Address_line_1_Englesea.Brook.Lane                                 <dbl> 0, …
## $ Address_line_1_Evelyn.Woods.Road                                   <dbl> 0, …
## $ Address_line_1_Exchange.Street                                     <dbl> 0, …
## $ Address_line_1_Exhibition.Square                                   <dbl> 0, …
## $ Address_line_1_Falcon.Centre                                       <dbl> 0, …
## $ Address_line_1_Fall.Barn.Road                                      <dbl> 0, …
## $ Address_line_1_Field.Farm                                          <dbl> 0, …
## $ Address_line_1_Filkins                                             <dbl> 0, …
## $ Address_line_1_Fleece.Yard                                         <dbl> 0, …
## $ Address_line_1_Fleet.Street                                        <dbl> 0, …
## $ Address_line_1_Fochabers.Folk.Museum                               <dbl> 0, …
## $ Address_line_1_Ford.End.Farm                                       <dbl> 0, …
## $ Address_line_1_Fore.Street                                         <dbl> 0, …
## $ Address_line_1_Fort.Amherst                                        <dbl> 0, …
## $ Address_line_1_Foundry.Street                                      <dbl> 0, …
## $ Address_line_1_Framework.Knitters.Cottages                         <dbl> 0, …
## $ Address_line_1_Francis.Street                                      <dbl> 0, …
## $ Address_line_1_Fraser.Noble.Building                               <dbl> 0, …
## $ Address_line_1_Freemasons.Hall                                     <dbl> 0, …
## $ Address_line_1_Frogmore.Paper.Mill                                 <dbl> 0, …
## $ Address_line_1_Furnace.Lane                                        <dbl> 0, …
## $ Address_line_1_Galleries.and.Museums.Department                    <dbl> 0, …
## $ Address_line_1_Gallery.Walk                                        <dbl> 0, …
## $ Address_line_1_Gasworks.Road                                       <dbl> 0, …
## $ Address_line_1_George.Edward.Hall                                  <dbl> 0, …
## $ Address_line_1_Gilfach.Ddu                                         <dbl> 0, …
## $ Address_line_1_Glamorgan.Cricket                                   <dbl> 0, …
## $ Address_line_1_Globe.House                                         <dbl> 0, …
## $ Address_line_1_Gloucester.Court.Mews                               <dbl> 0, …
## $ Address_line_1_Godinton.House                                      <dbl> 0, …
## $ Address_line_1_Gold.Street                                         <dbl> 0, …
## $ Address_line_1_Gomshall.Lane                                       <dbl> 0, …
## $ Address_line_1_Gossage.Building                                    <dbl> 0, …
## $ Address_line_1_Grammar.School.Walk                                 <dbl> 0, …
## $ Address_line_1_Great.Amwell                                        <dbl> 0, …
## $ Address_line_1_Great.Hall.Complex                                  <dbl> 0, …
## $ Address_line_1_Great.Pulteney.Street                               <dbl> 0, …
## $ Address_line_1_Great.Russell.Street                                <dbl> 0, …
## $ Address_line_1_Greaves.Street                                      <dbl> 0, …
## $ Address_line_1_Green.End                                           <dbl> 0, …
## $ Address_line_1_Gremista                                            <dbl> 0, …
## $ Address_line_1_Ground.Floor                                        <dbl> 0, …
## $ Address_line_1_Grove.Road                                          <dbl> 0, …
## $ Address_line_1_Guildford.Lawn                                      <dbl> 0, …
## $ Address_line_1_Guildhall                                           <dbl> 0, …
## $ Address_line_1_Guildhall.Library                                   <dbl> 0, …
## $ Address_line_1_Gun.Garden                                          <dbl> 0, …
## $ Address_line_1_Gunby                                               <dbl> 0, …
## $ Address_line_1_Gunners.Way                                         <dbl> 0, …
## $ Address_line_1_Gwy.House                                           <dbl> 0, …
## $ Address_line_1_Habergham.Dr                                        <dbl> 0, …
## $ Address_line_1_Hackmans.Lane                                       <dbl> 0, …
## $ Address_line_1_Halifax.Road                                        <dbl> 0, …
## $ Address_line_1_Halifax.Way                                         <dbl> 0, …
## $ Address_line_1_Hall.Lane                                           <dbl> 0, …
## $ Address_line_1_Hallam.Road                                         <dbl> 0, …
## $ Address_line_1_Hambledon                                           <dbl> 0, …
## $ Address_line_1_Hamilton.Street                                     <dbl> 0, …
## $ Address_line_1_Hampton.Cottage                                     <dbl> 0, …
## $ Address_line_1_Harbour.Road                                        <dbl> 0, …
## $ Address_line_1_Harbour.Square                                      <dbl> 0, …
## $ Address_line_1_Harlow.Carr.Botanical.Gardens                       <dbl> 0, …
## $ Address_line_1_Harveys.Lane                                        <dbl> 0, …
## $ Address_line_1_Hays.Way                                            <dbl> 0, …
## $ Address_line_1_Headquarters                                        <dbl> 0, …
## $ Address_line_1_Heatherslaw.Mill                                    <dbl> 0, …
## $ Address_line_1_Heaton.Park                                         <dbl> 0, …
## $ Address_line_1_Heron.House                                         <dbl> 0, …
## $ Address_line_1_High.Street                                         <dbl> 0, …
## $ Address_line_1_High.Town                                           <dbl> 0, …
## $ Address_line_1_Higher.Stennack                                     <dbl> 0, …
## $ Address_line_1_Hill.Street                                         <dbl> 0, …
## $ Address_line_1_Hill.Top                                            <dbl> 0, …
## $ Address_line_1_Holcombe.Road                                       <dbl> 0, …
## $ Address_line_1_Holt.Road                                           <dbl> 0, …
## $ Address_line_1_Hughenden.Manor                                     <dbl> 0, …
## $ Address_line_1_Hull.Road                                           <dbl> 0, …
## $ Address_line_1_Hunsbury.Hill.Road                                  <dbl> 0, …
## $ Address_line_1_Huntercombe                                         <dbl> 0, …
## $ Address_line_1_Hyde.Drive                                          <dbl> 0, …
## $ Address_line_1_Imperial.War.Museum                                 <dbl> 0, …
## $ Address_line_1_Isle.Martin                                         <dbl> 0, …
## $ Address_line_1_Ivychurch.Road                                      <dbl> 0, …
## $ Address_line_1_John.Paul.Jones.Cottage                             <dbl> 0, …
## $ Address_line_1_Kelham.Island.Museum                                <dbl> 0, …
## $ Address_line_1_Kensington.Gardens                                  <dbl> 0, …
## $ Address_line_1_Kent.Street                                         <dbl> 0, …
## $ Address_line_1_Keogh.Barracks                                      <dbl> 0, …
## $ Address_line_1_Kew.Gardens                                         <dbl> 0, …
## $ Address_line_1_King.Street                                         <dbl> 0, …
## $ Address_line_1_Kings.Yard                                          <dbl> 0, …
## $ Address_line_1_Kingsway                                            <dbl> 0, …
## $ Address_line_1_Kiplin                                              <dbl> 0, …
## $ Address_line_1_Kirkleatham                                         <dbl> 0, …
## $ Address_line_1_Kirkwynd                                            <dbl> 0, …
## $ Address_line_1_Kneller.Hall                                        <dbl> 0, …
## $ Address_line_1_Laidhay                                             <dbl> 0, …
## $ Address_line_1_Laird.Forge.Buildings                               <dbl> 0, …
## $ Address_line_1_Lamberhurst                                         <dbl> 0, …
## $ Address_line_1_Lambeth.Palace.Road                                 <dbl> 0, …
## $ Address_line_1_Lancaster.University                                <dbl> 0, …
## $ Address_line_1_Lanehead                                            <dbl> 0, …
## $ Address_line_1_Lavender.Pond.and.Nature.Park                       <dbl> 0, …
## $ Address_line_1_Lawn.Road                                           <dbl> 0, …
## $ Address_line_1_Left.Bank                                           <dbl> 0, …
## $ Address_line_1_Leigh.Town.Hall                                     <dbl> 0, …
## $ Address_line_1_Lighthouse.Road                                     <dbl> 0, …
## $ Address_line_1_Lineside                                            <dbl> 0, …
## $ Address_line_1_Linlithgow.Canal.Centre                             <dbl> 0, …
## $ Address_line_1_Liskeard.Road                                       <dbl> 0, …
## $ Address_line_1_Little.Heath                                        <dbl> 0, …
## $ Address_line_1_Livingston.Mill                                     <dbl> 0, …
## $ Address_line_1_Lizard.Point                                        <dbl> 0, …
## $ Address_line_1_Llandwrog                                           <dbl> 0, …
## $ Address_line_1_Llanfrecha.Way                                      <dbl> 0, …
## $ Address_line_1_Llanthony.Warehouse                                 <dbl> 0, …
## $ Address_line_1_Lock.Lane                                           <dbl> 0, …
## $ Address_line_1_Loco.Works.Road                                     <dbl> 0, …
## $ Address_line_1_Lodge.Hill.Lane                                     <dbl> 0, …
## $ Address_line_1_Lord.Street                                         <dbl> 0, …
## $ Address_line_1_Lough.Neath.Discovery.Centre                        <dbl> 0, …
## $ Address_line_1_Low.Lighthouse                                      <dbl> 0, …
## $ Address_line_1_Lower.High.Street                                   <dbl> 0, …
## $ Address_line_1_Lower.Hillhead                                      <dbl> 0, …
## $ Address_line_1_Lower.Road                                          <dbl> 0, …
## $ Address_line_1_Lyness                                              <dbl> 0, …
## $ Address_line_1_Main.Road                                           <dbl> 0, …
## $ Address_line_1_Main.Street                                         <dbl> 0, …
## $ Address_line_1_Marischal.College                                   <dbl> 0, …
## $ Address_line_1_Market.Place                                        <dbl> 0, …
## $ Address_line_1_Market.Square                                       <dbl> 0, …
## $ Address_line_1_Market.Street                                       <dbl> 0, …
## $ Address_line_1_Markfield.Road                                      <dbl> 0, …
## $ Address_line_1_Marlborough.Road                                    <dbl> 0, …
## $ Address_line_1_Marshall.Place                                      <dbl> 0, …
## $ Address_line_1_Martello.Tower                                      <dbl> 0, …
## $ Address_line_1_Masonic.Hall                                        <dbl> 0, …
## $ Address_line_1_McConnell.Drive                                     <dbl> 0, …
## $ Address_line_1_Mercer.Park                                         <dbl> 0, …
## $ Address_line_1_Micklegate                                          <dbl> 0, …
## $ Address_line_1_Middleton.Hall                                      <dbl> 0, …
## $ Address_line_1_Mileham.Road                                        <dbl> 0, …
## $ Address_line_1_Mill.Lane                                           <dbl> 0, …
## $ Address_line_1_Mill.Street                                         <dbl> 0, …
## $ Address_line_1_Milton                                              <dbl> 0, …
## $ Address_line_1_Minster.Road                                        <dbl> 0, …
## $ Address_line_1_Moat.Park                                           <dbl> 0, …
## $ Address_line_1_MoDA.Collections.Centre                             <dbl> 0, …
## $ Address_line_1_Montacute                                           <dbl> 0, …
## $ Address_line_1_Montague.House                                      <dbl> 0, …
## $ Address_line_1_Monument.Cl                                         <dbl> 0, …
## $ Address_line_1_Moorside.Road                                       <dbl> 0, …
## $ Address_line_1_Morgans.Lane                                        <dbl> 0, …
## $ Address_line_1_Moss.Street                                         <dbl> 0, …
## $ Address_line_1_Mottisfont                                          <dbl> 0, …
## $ Address_line_1_Moyard.House                                        <dbl> 0, …
## $ Address_line_1_Muggeridge.Farm                                     <dbl> 0, …
## $ Address_line_1_Muller.House                                        <dbl> 0, …
## $ Address_line_1_Museum.and.Arts.Centre                              <dbl> 0, …
## $ Address_line_1_Museum.and.Study.Collection                         <dbl> 0, …
## $ Address_line_1_Museum.of.South.Somerset                            <dbl> 0, …
## $ Address_line_1_Museum.Square                                       <dbl> 0, …
## $ Address_line_1_Museum.Terrace                                      <dbl> 0, …
## $ Address_line_1_Ninewells.Hospital.and.Medical.School               <dbl> 0, …
## $ Address_line_1_No..2.Office.Block                                  <dbl> 0, …
## $ Address_line_1_North.Frodingham.Cemetery                           <dbl> 0, …
## $ Address_line_1_North.Norfolk.Railway                               <dbl> 0, …
## $ Address_line_1_North.Road                                          <dbl> 0, …
## $ Address_line_1_North.Street                                        <dbl> 0, …
## $ Address_line_1_Norwich.Heritage...Regeneration.Trust               <dbl> 0, …
## $ Address_line_1_Oaklands.Park                                       <dbl> 0, …
## $ Address_line_1_Oakley.Wharf.No.1                                   <dbl> 0, …
## $ Address_line_1_Old.Blue.Mill                                       <dbl> 0, …
## $ Address_line_1_Old.Congregational.Chapel                           <dbl> 0, …
## $ Address_line_1_Old.GVT.Engine.Shed                                 <dbl> 0, …
## $ Address_line_1_Old.Mills.Road                                      <dbl> 0, …
## $ Address_line_1_Old.Parish.Church                                   <dbl> 0, …
## $ Address_line_1_Old.School                                          <dbl> 0, …
## $ Address_line_1_Old.Station.Yard                                    <dbl> 0, …
## $ Address_line_1_Old.Town                                            <dbl> 0, …
## $ Address_line_1_Old.Tunbridge.Electricity.Generating.Station        <dbl> 0, …
## $ Address_line_1_Oundle.Road                                         <dbl> 0, …
## $ Address_line_1_Palace.Avenue.Arcade                                <dbl> 0, …
## $ Address_line_1_Palacerigg.Country.Park                             <dbl> 0, …
## $ Address_line_1_Parade.Street                                       <dbl> 0, …
## $ Address_line_1_Park.Walk                                           <dbl> 0, …
## $ Address_line_1_Parkinson.Building                                  <dbl> 0, …
## $ Address_line_1_Paternoster.Row                                     <dbl> 0, …
## $ Address_line_1_Paynes.Park                                         <dbl> 0, …
## $ Address_line_1_Peel.Park                                           <dbl> 0, …
## $ Address_line_1_Perth.Road                                          <dbl> 0, …
## $ Address_line_1_Philipshill.Road                                    <dbl> 0, …
## $ Address_line_1_Pickering.Road                                      <dbl> 0, …
## $ Address_line_1_Pickering.Station                                   <dbl> 0, …
## $ Address_line_1_Pier.Road                                           <dbl> 0, …
## $ Address_line_1_Plas.Newydd                                         <dbl> 0, …
## $ Address_line_1_Polesden.Lacey                                      <dbl> 0, …
## $ Address_line_1_Pool                                                <dbl> 0, …
## $ Address_line_1_Port.Sunlight.Village                               <dbl> 0, …
## $ Address_line_1_Prescott.Hill                                       <dbl> 0, …
## $ Address_line_1_Preston.Park                                        <dbl> 0, …
## $ Address_line_1_Prestongrange                                       <dbl> 0, …
## $ Address_line_1_Priory.Lodge                                        <dbl> 0, …
## $ Address_line_1_Priory.Row                                          <dbl> 0, …
## $ Address_line_1_Purfleet.Quay                                       <dbl> 0, …
## $ Address_line_1_Queen.Street                                        <dbl> 0, …
## $ Address_line_1_Rail.Yard                                           <dbl> 0, …
## $ Address_line_1_Ravenor.Farm                                        <dbl> 0, …
## $ Address_line_1_Ravenshead                                          <dbl> 0, …
## $ Address_line_1_Ravenspoint                                         <dbl> 0, …
## $ Address_line_1_Rayrigg.Road                                        <dbl> 0, …
## $ Address_line_1_Rectory.Place                                       <dbl> 0, …
## $ Address_line_1_Regional.Resource.Centre                            <dbl> 0, …
## $ Address_line_1_Repton.School                                       <dbl> 0, …
## $ Address_line_1_Riccarton                                           <dbl> 0, …
## $ Address_line_1_Rishworth.Road                                      <dbl> 0, …
## $ Address_line_1_River.Court                                         <dbl> 0, …
## $ Address_line_1_Riverside                                           <dbl> 0, …
## $ Address_line_1_Riverside.Rd                                        <dbl> 0, …
## $ Address_line_1_Riversley.Park                                      <dbl> 0, …
## $ Address_line_1_Robinsons.School                                    <dbl> 0, …
## $ Address_line_1_Roderick.Bowen.Library                              <dbl> 0, …
## $ Address_line_1_Rowleys.House.Museum                                <dbl> 0, …
## $ Address_line_1_Royal.Academy.of.Music                              <dbl> 0, …
## $ Address_line_1_Royal.Hospital.Road                                 <dbl> 0, …
## $ Address_line_1_Royal.Marines.Museum                                <dbl> 0, …
## $ Address_line_1_Royal.Oak.Farm                                      <dbl> 0, …
## $ Address_line_1_Royal.Victoria.Arcade                               <dbl> 0, …
## $ Address_line_1_Salisbury.Hall                                      <dbl> 0, …
## $ Address_line_1_Sally.Lunns.House                                   <dbl> 0, …
## $ Address_line_1_Sandown.Airport                                     <dbl> 0, …
## $ Address_line_1_Sandown.Barrack.Battery                             <dbl> 0, …
## $ Address_line_1_Scalasaig.Harbour                                   <dbl> 0, …
## $ Address_line_1_School.House                                        <dbl> 0, …
## $ Address_line_1_School.Road                                         <dbl> 0, …
## $ Address_line_1_Schoolhouse                                         <dbl> 0, …
## $ Address_line_1_Sevenoaks.Library                                   <dbl> 0, …
## $ Address_line_1_Shapwick.Road                                       <dbl> 0, …
## $ Address_line_1_Shenstone.Drive                                     <dbl> 0, …
## $ Address_line_1_Sheringham.Station                                  <dbl> 0, …
## $ Address_line_1_Shibden.Hall                                        <dbl> 0, …
## $ Address_line_1_Shugborough.Estate                                  <dbl> 0, …
## $ Address_line_1_Signal.Tower                                        <dbl> 0, …
## $ Address_line_1_Singleton                                           <dbl> 0, …
## $ Address_line_1_Sir.John.Barrow.Cottage                             <dbl> 0, …
## $ Address_line_1_Sir.Kenneth.Green.Library                           <dbl> 0, …
## $ Address_line_1_Slatequarry.Road                                    <dbl> 0, …
## $ Address_line_1_Sloe.Lane                                           <dbl> 0, …
## $ Address_line_1_Smallhythe                                          <dbl> 0, …
## $ Address_line_1_Soho.Cottages                                       <dbl> 0, …
## $ Address_line_1_Somerset.House                                      <dbl> 0, …
## $ Address_line_1_South.Shore                                         <dbl> 0, …
## $ Address_line_1_Southey.Works                                       <dbl> 0, …
## $ Address_line_1_Sparrows.Nest.Gardens                               <dbl> 0, …
## $ Address_line_1_Spicehall.Park.Road                                 <dbl> 0, …
## $ Address_line_1_Springtown.Road                                     <dbl> 0, …
## $ Address_line_1_St.Ayles                                            <dbl> 0, …
## $ Address_line_1_St.Fagans                                           <dbl> 0, …
## $ Address_line_1_St.Marys.Gate                                       <dbl> 0, …
## $ Address_line_1_St.Mathews.Street                                   <dbl> 0, …
## $ Address_line_1_St.Nicholas.Circle                                  <dbl> 0, …
## $ Address_line_1_St.Peters.Street                                    <dbl> 0, …
## $ Address_line_1_St.Wilfreds.Chapel                                  <dbl> 0, …
## $ Address_line_1_St..Keyne.Station                                   <dbl> 0, …
## $ Address_line_1_St..Marys.Hospital                                  <dbl> 0, …
## $ Address_line_1_Stanley.Street                                      <dbl> 0, …
## $ Address_line_1_Station.Approach                                    <dbl> 0, …
## $ Address_line_1_Station.Road                                        <dbl> 0, …
## $ Address_line_1_Station.Yard                                        <dbl> 0, …
## $ Address_line_1_Steeple.Lane                                        <dbl> 0, …
## $ Address_line_1_Stock.Woods                                         <dbl> 0, …
## $ Address_line_1_Street.House                                        <dbl> 0, …
## $ Address_line_1_Summer.Lane                                         <dbl> 0, …
## $ Address_line_1_Surrey.Fire.and.Rescue.Service.Headquarters         <dbl> 0, …
## $ Address_line_1_Surrey.Street                                       <dbl> 0, …
## $ Address_line_1_TA.Centre                                           <dbl> 0, …
## $ Address_line_1_Tameside.Central.Library...Art.Gallery              <dbl> 0, …
## $ Address_line_1_Tannaghmore.Gardens                                 <dbl> 0, …
## $ Address_line_1_Tarbatness.Road                                     <dbl> 0, …
## $ Address_line_1_Tatton.Park                                         <dbl> 0, …
## $ Address_line_1_Temple.Newsam.Estate                                <dbl> 0, …
## $ Address_line_1_Territorial.Army.Centre                             <dbl> 0, …
## $ Address_line_1_Thames.Valley.Police.Training.Centre                <dbl> 0, …
## $ Address_line_1_Thameside.Complex                                   <dbl> 0, …
## $ Address_line_1_The.Agricultural.Museum                             <dbl> 0, …
## $ Address_line_1_The.Arched.House                                    <dbl> 0, …
## $ Address_line_1_The.Ark                                             <dbl> 0, …
## $ Address_line_1_The.Barge.MV.Confiance                              <dbl> 0, …
## $ Address_line_1_The.Broadway                                        <dbl> 0, …
## $ Address_line_1_The.Butts                                           <dbl> 0, …
## $ Address_line_1_The.Castle                                          <dbl> 0, …
## $ Address_line_1_The.Castleton.Centre                                <dbl> 0, …
## $ Address_line_1_The.Cathedral                                       <dbl> 0, …
## $ Address_line_1_The.Clease                                          <dbl> 0, …
## $ Address_line_1_The.Coliseum                                        <dbl> 0, …
## $ Address_line_1_The.Discovery.Centre..Aberdeenshire.Museums.Service <dbl> 0, …
## $ Address_line_1_The.Former.Guardroom..Building.40                   <dbl> 0, …
## $ Address_line_1_The.Friary                                          <dbl> 0, …
## $ Address_line_1_The.Grain.Store                                     <dbl> 0, …
## $ Address_line_1_The.Harbour                                         <dbl> 0, …
## $ Address_line_1_The.Headrow                                         <dbl> 0, …
## $ Address_line_1_The.Heritage.Centre                                 <dbl> 0, …
## $ Address_line_1_The.Institute                                       <dbl> 0, …
## $ Address_line_1_The.Keep                                            <dbl> 0, …
## $ Address_line_1_The.Langdon.Down.Centre                             <dbl> 0, …
## $ Address_line_1_The.Malt.Barn                                       <dbl> 0, …
## $ Address_line_1_The.Mansion                                         <dbl> 0, …
## $ Address_line_1_The.Market.House                                    <dbl> 0, …
## $ Address_line_1_The.Mens.Institute                                  <dbl> 0, …
## $ Address_line_1_The.Mint                                            <dbl> 0, …
## $ Address_line_1_The.Newarke                                         <dbl> 0, …
## $ Address_line_1_The.Octagon                                         <dbl> 0, …
## $ Address_line_1_The.Old.Fire.Station                                <dbl> 0, …
## $ Address_line_1_The.Old.Foundry                                     <dbl> 0, …
## $ Address_line_1_The.Old.Grammar.School                              <dbl> 0, …
## $ Address_line_1_The.Old.Guildhall                                   <dbl> 0, …
## $ Address_line_1_The.Old.Library                                     <dbl> 0, …
## $ Address_line_1_The.Old.Parsonage                                   <dbl> 0, …
## $ Address_line_1_The.Old.Police.Station                              <dbl> 0, …
## $ Address_line_1_The.Old.Pottery                                     <dbl> 0, …
## $ Address_line_1_The.Old.Railway.Station                             <dbl> 0, …
## $ Address_line_1_The.Old.School.House                                <dbl> 0, …
## $ Address_line_1_The.Old.Schoolmasters.House                         <dbl> 0, …
## $ Address_line_1_The.Old.Station                                     <dbl> 0, …
## $ Address_line_1_The.Old.Workhouse                                   <dbl> 0, …
## $ Address_line_1_The.Phoenix.Centre                                  <dbl> 0, …
## $ Address_line_1_The.Prebendal.Manor.House                           <dbl> 0, …
## $ Address_line_1_The.Precinct                                        <dbl> 0, …
## $ Address_line_1_The.Priory                                          <dbl> 0, …
## $ Address_line_1_The.Quays                                           <dbl> 0, …
## $ Address_line_1_The.Railway.Station                                 <dbl> 0, …
## $ Address_line_1_The.Redoubt.Fortress                                <dbl> 0, …
## $ Address_line_1_The.Royal.Chantry                                   <dbl> 0, …
## $ Address_line_1_The.Scottish.Infantry.Depot                         <dbl> 0, …
## $ Address_line_1_The.Shallows                                        <dbl> 0, …
## $ Address_line_1_The.Square                                          <dbl> 0, …
## $ Address_line_1_The.Stones                                          <dbl> 0, …
## $ Address_line_1_The.Street                                          <dbl> 0, …
## $ Address_line_1_The.Tolmen.Centre                                   <dbl> 0, …
## $ Address_line_1_The.Town.Hall                                       <dbl> 0, …
## $ Address_line_1_The.Town.House                                      <dbl> 0, …
## $ Address_line_1_The.Tree                                            <dbl> 0, …
## $ Address_line_1_The.Tribunal                                        <dbl> 0, …
## $ Address_line_1_The.Undercroft                                      <dbl> 0, …
## $ Address_line_1_The.University                                      <dbl> 0, …
## $ Address_line_1_The.University.of.Birmingham                        <dbl> 0, …
## $ Address_line_1_The.University.of.Newcastle                         <dbl> 0, …
## $ Address_line_1_The.Valley                                          <dbl> 0, …
## $ Address_line_1_The.Warren                                          <dbl> 0, …
## $ Address_line_1_The.Wharf                                           <dbl> 0, …
## $ Address_line_1_The.Yard                                            <dbl> 0, …
## $ Address_line_1_Theatre.Yard                                        <dbl> 0, …
## $ Address_line_1_Thoresby.Courtyard                                  <dbl> 0, …
## $ Address_line_1_Throwleigh                                          <dbl> 0, …
## $ Address_line_1_Tide.Mill.Way                                       <dbl> 0, …
## $ Address_line_1_Tolgus.Mount                                        <dbl> 0, …
## $ Address_line_1_Tolhouse.Street                                     <dbl> 0, …
## $ Address_line_1_Tollcross.Park                                      <dbl> 0, …
## $ Address_line_1_Town.Hall                                           <dbl> 0, …
## $ Address_line_1_Town.Hall.Complex                                   <dbl> 0, …
## $ Address_line_1_Town.House.West                                     <dbl> 0, …
## $ Address_line_1_Town.Street                                         <dbl> 0, …
## $ Address_line_1_Tredegar.House                                      <dbl> 0, …
## $ Address_line_1_Tresco.Estate                                       <dbl> 0, …
## $ Address_line_1_Trinity.Road                                        <dbl> 0, …
## $ Address_line_1_Trinity.Street                                      <dbl> 0, …
## $ Address_line_1_Trumpington.Street                                  <dbl> 0, …
## $ Address_line_1_Tudor.Barn                                          <dbl> 0, …
## $ Address_line_1_Tudor.House                                         <dbl> 0, …
## $ Address_line_1_Tudor.Road                                          <dbl> 0, …
## $ Address_line_1_Twelvetrees.Crescent                                <dbl> 0, …
## $ Address_line_1_Unit.5                                              <dbl> 0, …
## $ Address_line_1_Units.1                                             <dbl> 0, …
## $ Address_line_1_University.Archives.and.Special.Collections.Centre  <dbl> 0, …
## $ Address_line_1_University.College.London                           <dbl> 0, …
## $ Address_line_1_University.of.East.Anglia                           <dbl> 0, …
## $ Address_line_1_University.of.Manchester                            <dbl> 0, …
## $ Address_line_1_University.of.Warwick                               <dbl> 0, …
## $ Address_line_1_Upper.Hall                                          <dbl> 0, …
## $ Address_line_1_Urbis.Building                                      <dbl> 0, …
## $ Address_line_1_Valley.Road                                         <dbl> 0, …
## $ Address_line_1_Verdant.Works                                       <dbl> 0, …
## $ Address_line_1_Vestry.Road                                         <dbl> 0, …
## $ Address_line_1_Vicars.Lane                                         <dbl> 0, …
## $ Address_line_1_Victoria.Road                                       <dbl> 0, …
## $ Address_line_1_Victoria.Station                                    <dbl> 0, …
## $ Address_line_1_Village.Hall                                        <dbl> 0, …
## $ Address_line_1_War.Memorial.Building                               <dbl> 0, …
## $ Address_line_1_War.Memorial.Gardens                                <dbl> 0, …
## $ Address_line_1_Wardown.Park                                        <dbl> 0, …
## $ Address_line_1_Warhammer.World                                     <dbl> 0, …
## $ Address_line_1_Warwickshire.County.Cricket.Club                    <dbl> 0, …
## $ Address_line_1_Welbeck.Estate                                      <dbl> 0, …
## $ Address_line_1_Wellington.Mill                                     <dbl> 0, …
## $ Address_line_1_West.High.Down                                      <dbl> 0, …
## $ Address_line_1_West.Hoathly                                        <dbl> 0, …
## $ Address_line_1_West.Strand                                         <dbl> 0, …
## $ Address_line_1_Westminster.Synagogue                               <dbl> 0, …
## $ Address_line_1_Whisby.Road                                         <dbl> 0, …
## $ Address_line_1_Whitburn.Library                                    <dbl> 0, …
## $ Address_line_1_White.Lodge                                         <dbl> 0, …
## $ Address_line_1_Whitefriars                                         <dbl> 0, …
## $ Address_line_1_Wightwick.Bank                                      <dbl> 0, …
## $ Address_line_1_Willenhall.Library                                  <dbl> 0, …
## $ Address_line_1_Willesden.Green.Library.Centre                      <dbl> 0, …
## $ Address_line_1_Willowburn.Trading.Estate                           <dbl> 0, …
## $ Address_line_1_Willows.and.Wetlands.Visitor.Centre                 <dbl> 0, …
## $ Address_line_1_Wills.Memorial.Building                             <dbl> 0, …
## $ Address_line_1_Winchester.Road                                     <dbl> 0, …
## $ Address_line_1_Winthorpe.Airfield                                  <dbl> 0, …
## $ Address_line_1_Woodhorn.Village                                    <dbl> 0, …
## $ Address_line_1_Wookey.Hole                                         <dbl> 0, …
## $ Address_line_1_Worcestershire.Royal.Hospital                       <dbl> 0, …
## $ Address_line_1_Worsbrough.Bridge                                   <dbl> 0, …
## $ Address_line_1_Yanmouth                                            <dbl> 0, …
## $ Address_line_1_York.Avenue                                         <dbl> 0, …
## $ Address_line_1_Ysgubor.Lleucu                                      <dbl> 0, …
## $ Address_line_1_new                                                 <dbl> 0, …
## $ Governance_Government.Local_Authority                              <dbl> 0, …
## $ Governance_Government.National                                     <dbl> 0, …
## $ Governance_Government.Other                                        <dbl> 0, …
## $ Governance_Independent.English_Heritage                            <dbl> 0, …
## $ Governance_Independent.Historic_Environment_Scotland               <dbl> 0, …
## $ Governance_Independent.National_Trust                              <dbl> 0, …
## $ Governance_Independent.National_Trust_for_Scotland                 <dbl> 0, …
## $ Governance_Independent.Not_for_profit                              <dbl> 1, …
## $ Governance_Independent.Private                                     <dbl> 0, …
## $ Governance_Independent.Unknown                                     <dbl> 0, …
## $ Governance_University                                              <dbl> 0, …
## $ Governance_Unknown                                                 <dbl> 0, …
## $ Governance_new                                                     <dbl> 0, …
## $ Size_large                                                         <dbl> 0, …
## $ Size_medium                                                        <dbl> 0, …
## $ Size_small                                                         <dbl> 1, …
## $ Size_unknown                                                       <dbl> 0, …
## $ Size_new                                                           <dbl> 0, …
## $ Subject_Matter_Archaeology.Greek_and_Egyptian                      <dbl> 0, …
## $ Subject_Matter_Archaeology.Medieval                                <dbl> 0, …
## $ Subject_Matter_Archaeology.Mixed                                   <dbl> 0, …
## $ Subject_Matter_Archaeology.Other                                   <dbl> 0, …
## $ Subject_Matter_Archaeology.Prehistory                              <dbl> 0, …
## $ Subject_Matter_Archaeology.Roman                                   <dbl> 0, …
## $ Subject_Matter_Arts.Ceramics                                       <dbl> 0, …
## $ Subject_Matter_Arts.Costume_and_textiles                           <dbl> 0, …
## $ Subject_Matter_Arts.Crafts                                         <dbl> 0, …
## $ Subject_Matter_Arts.Design                                         <dbl> 0, …
## $ Subject_Matter_Arts.Fine_and_decorative_arts                       <dbl> 0, …
## $ Subject_Matter_Arts.Glass                                          <dbl> 0, …
## $ Subject_Matter_Arts.Literature                                     <dbl> 0, …
## $ Subject_Matter_Arts.Music                                          <dbl> 0, …
## $ Subject_Matter_Arts.Other                                          <dbl> 0, …
## $ Subject_Matter_Arts.Photography                                    <dbl> 0, …
## $ Subject_Matter_Belief_and_identity                                 <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Church_treasuries               <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Ethnic_group                    <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Freemasons                      <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Other                           <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Religion                        <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Religious_buildings             <dbl> 0, …
## $ Subject_Matter_Buildings.Civic                                     <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Large_houses                       <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Medium_houses                      <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Small_houses                       <dbl> 0, …
## $ Subject_Matter_Buildings.Other                                     <dbl> 0, …
## $ Subject_Matter_Buildings.Palace                                    <dbl> 0, …
## $ Subject_Matter_Buildings.Penal                                     <dbl> 0, …
## $ Subject_Matter_Buildings.School                                    <dbl> 0, …
## $ Subject_Matter_Buildings.Shops                                     <dbl> 0, …
## $ Subject_Matter_Communications                                      <dbl> 0, …
## $ Subject_Matter_Communications.Other                                <dbl> 0, …
## $ Subject_Matter_Communications.Post                                 <dbl> 0, …
## $ Subject_Matter_Communications.Radio                                <dbl> 0, …
## $ Subject_Matter_Food_and_drink                                      <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Clocks_and_watches         <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Industrial_life            <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Metals                     <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Mining_and_quarrying       <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Mixed                      <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Other                      <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Potteries                  <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Print                      <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Steam_and_engines          <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Textiles                   <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Cricket                           <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Fairgrounds_and_amusements        <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Film_Cinema_and_TV                <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Other                             <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Rugby_and_football                <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Toys_and_models                   <dbl> 0, …
## $ Subject_Matter_Local_Histories                                     <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Hospital                        <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Other                           <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Professional_association        <dbl> 0, …
## $ Subject_Matter_Mixed.Bygones                                       <dbl> 0, …
## $ Subject_Matter_Mixed.Encyclopaedic                                 <dbl> 0, …
## $ Subject_Matter_Mixed.Other                                         <dbl> 0, …
## $ Subject_Matter_Natural_world                                       <dbl> 0, …
## $ Subject_Matter_Natural_world.Dinosaurs                             <dbl> 1, …
## $ Subject_Matter_Natural_world.Fossils                               <dbl> 0, …
## $ Subject_Matter_Natural_world.Geology                               <dbl> 0, …
## $ Subject_Matter_Natural_world.Herbaria_and_gardening                <dbl> 0, …
## $ Subject_Matter_Natural_world.Mixed                                 <dbl> 0, …
## $ Subject_Matter_Natural_world.Other                                 <dbl> 0, …
## $ Subject_Matter_Natural_world.Zoology                               <dbl> 0, …
## $ Subject_Matter_Other                                               <dbl> 0, …
## $ Subject_Matter_Personality.Art                                     <dbl> 0, …
## $ Subject_Matter_Personality.Explorer                                <dbl> 0, …
## $ Subject_Matter_Personality.Literary                                <dbl> 0, …
## $ Subject_Matter_Personality.Music                                   <dbl> 0, …
## $ Subject_Matter_Personality.Other                                   <dbl> 0, …
## $ Subject_Matter_Personality.Political                               <dbl> 0, …
## $ Subject_Matter_Personality.Religious                               <dbl> 0, …
## $ Subject_Matter_Personality.Scientific                              <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Farming                              <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Forges                               <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Other                                <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Rural_life                           <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Textiles                             <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Watermills                           <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Windmills                            <dbl> 0, …
## $ Subject_Matter_Science_and_technology.Computing_and_gaming         <dbl> 0, …
## $ Subject_Matter_Science_and_technology.Other                        <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Boats_and_ships                   <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Fishing                           <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Lighthouses                       <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Mixed                             <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Other                             <dbl> 0, …
## $ Subject_Matter_Services.Fire                                       <dbl> 0, …
## $ Subject_Matter_Services.Other                                      <dbl> 0, …
## $ Subject_Matter_Services.Police                                     <dbl> 0, …
## $ Subject_Matter_Services.RNLI                                       <dbl> 0, …
## $ Subject_Matter_Transport.Aviation                                  <dbl> 0, …
## $ Subject_Matter_Transport.Bicycles                                  <dbl> 0, …
## $ Subject_Matter_Transport.Buses_and_trams                           <dbl> 0, …
## $ Subject_Matter_Transport.Canals                                    <dbl> 0, …
## $ Subject_Matter_Transport.Cars_and_motorbikes                       <dbl> 0, …
## $ Subject_Matter_Transport.Mixed                                     <dbl> 0, …
## $ Subject_Matter_Transport.Other                                     <dbl> 0, …
## $ Subject_Matter_Transport.Trains_and_railways                       <dbl> 0, …
## $ Subject_Matter_Utilities.Gas_and_electricity                       <dbl> 0, …
## $ Subject_Matter_Utilities.Water_and_waste                           <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Airforce                           <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Bunker                             <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Castles_and_forts                  <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Event_or_site                      <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Military                           <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Navy                               <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Other                              <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Regiment                           <dbl> 0, …
## $ Subject_Matter_new                                                 <dbl> 0, …
## $ Year_opened_X1759                                                  <dbl> 0, …
## $ Year_opened_X1761                                                  <dbl> 0, …
## $ Year_opened_X1816                                                  <dbl> 0, …
## $ Year_opened_X1823                                                  <dbl> 0, …
## $ Year_opened_X1829                                                  <dbl> 0, …
## $ Year_opened_X1835                                                  <dbl> 0, …
## $ Year_opened_X1837                                                  <dbl> 0, …
## $ Year_opened_X1845                                                  <dbl> 0, …
## $ Year_opened_X1846                                                  <dbl> 0, …
## $ Year_opened_X1847                                                  <dbl> 0, …
## $ Year_opened_X1850                                                  <dbl> 0, …
## $ Year_opened_X1852                                                  <dbl> 0, …
## $ Year_opened_X1857                                                  <dbl> 0, …
## $ Year_opened_X1859                                                  <dbl> 0, …
## $ Year_opened_X1860                                                  <dbl> 0, …
## $ Year_opened_X1864                                                  <dbl> 0, …
## $ Year_opened_X1866                                                  <dbl> 0, …
## $ Year_opened_X1867                                                  <dbl> 0, …
## $ Year_opened_X1868                                                  <dbl> 0, …
## $ Year_opened_X1869                                                  <dbl> 0, …
## $ Year_opened_X1876                                                  <dbl> 0, …
## $ Year_opened_X1878                                                  <dbl> 0, …
## $ Year_opened_X1880                                                  <dbl> 0, …
## $ Year_opened_X1881                                                  <dbl> 0, …
## $ Year_opened_X1883                                                  <dbl> 0, …
## $ Year_opened_X1884                                                  <dbl> 0, …
## $ Year_opened_X1885                                                  <dbl> 0, …
## $ Year_opened_X1886                                                  <dbl> 0, …
## $ Year_opened_X1888                                                  <dbl> 0, …
## $ Year_opened_X1889                                                  <dbl> 0, …
## $ Year_opened_X1890                                                  <dbl> 0, …
## $ Year_opened_X1892                                                  <dbl> 0, …
## $ Year_opened_X1893                                                  <dbl> 0, …
## $ Year_opened_X1895                                                  <dbl> 0, …
## $ Year_opened_X1896                                                  <dbl> 0, …
## $ Year_opened_X1898                                                  <dbl> 0, …
## $ Year_opened_X1899                                                  <dbl> 0, …
## $ Year_opened_X1900                                                  <dbl> 0, …
## $ Year_opened_X1901                                                  <dbl> 0, …
## $ Year_opened_X1902                                                  <dbl> 0, …
## $ Year_opened_X1905                                                  <dbl> 0, …
## $ Year_opened_X1906                                                  <dbl> 0, …
## $ Year_opened_X1907                                                  <dbl> 0, …
## $ Year_opened_X1908                                                  <dbl> 0, …
## $ Year_opened_X1911                                                  <dbl> 0, …
## $ Year_opened_X1912                                                  <dbl> 0, …
## $ Year_opened_X1913                                                  <dbl> 0, …
## $ Year_opened_X1914                                                  <dbl> 0, …
## $ Year_opened_X1915                                                  <dbl> 0, …
## $ Year_opened_X1916                                                  <dbl> 0, …
## $ Year_opened_X1917                                                  <dbl> 0, …
## $ Year_opened_X1919                                                  <dbl> 0, …
## $ Year_opened_X1920                                                  <dbl> 0, …
## $ Year_opened_X1922                                                  <dbl> 0, …
## $ Year_opened_X1923                                                  <dbl> 0, …
## $ Year_opened_X1924                                                  <dbl> 0, …
## $ Year_opened_X1925                                                  <dbl> 0, …
## $ Year_opened_X1926                                                  <dbl> 0, …
## $ Year_opened_X1927                                                  <dbl> 0, …
## $ Year_opened_X1928                                                  <dbl> 0, …
## $ Year_opened_X1929                                                  <dbl> 0, …
## $ Year_opened_X1930                                                  <dbl> 0, …
## $ Year_opened_X1931                                                  <dbl> 0, …
## $ Year_opened_X1932                                                  <dbl> 0, …
## $ Year_opened_X1933                                                  <dbl> 0, …
## $ Year_opened_X1934                                                  <dbl> 0, …
## $ Year_opened_X1935                                                  <dbl> 0, …
## $ Year_opened_X1936                                                  <dbl> 0, …
## $ Year_opened_X1937                                                  <dbl> 0, …
## $ Year_opened_X1938                                                  <dbl> 0, …
## $ Year_opened_X1941                                                  <dbl> 0, …
## $ Year_opened_X1942                                                  <dbl> 0, …
## $ Year_opened_X1944                                                  <dbl> 0, …
## $ Year_opened_X1945                                                  <dbl> 0, …
## $ Year_opened_X1946                                                  <dbl> 0, …
## $ Year_opened_X1947                                                  <dbl> 0, …
## $ Year_opened_X1948                                                  <dbl> 0, …
## $ Year_opened_X1949                                                  <dbl> 0, …
## $ Year_opened_X1950                                                  <dbl> 0, …
## $ Year_opened_X1951                                                  <dbl> 0, …
## $ Year_opened_X1952                                                  <dbl> 0, …
## $ Year_opened_X1953                                                  <dbl> 0, …
## $ Year_opened_X1954                                                  <dbl> 0, …
## $ Year_opened_X1955                                                  <dbl> 0, …
## $ Year_opened_X1956                                                  <dbl> 0, …
## $ Year_opened_X1957                                                  <dbl> 0, …
## $ Year_opened_X1958                                                  <dbl> 0, …
## $ Year_opened_X1959                                                  <dbl> 0, …
## $ Year_opened_X1960                                                  <dbl> 0, …
## $ Year_opened_X1961                                                  <dbl> 0, …
## $ Year_opened_X1962                                                  <dbl> 0, …
## $ Year_opened_X1963                                                  <dbl> 0, …
## $ Year_opened_X1964                                                  <dbl> 0, …
## $ Year_opened_X1965                                                  <dbl> 0, …
## $ Year_opened_X1966                                                  <dbl> 0, …
## $ Year_opened_X1967                                                  <dbl> 0, …
## $ Year_opened_X1968                                                  <dbl> 0, …
## $ Year_opened_X1969                                                  <dbl> 0, …
## $ Year_opened_X1970                                                  <dbl> 0, …
## $ Year_opened_X1971                                                  <dbl> 0, …
## $ Year_opened_X1972                                                  <dbl> 0, …
## $ Year_opened_X1973                                                  <dbl> 0, …
## $ Year_opened_X1974                                                  <dbl> 0, …
## $ Year_opened_X1975                                                  <dbl> 0, …
## $ Year_opened_X1976                                                  <dbl> 0, …
## $ Year_opened_X1977                                                  <dbl> 0, …
## $ Year_opened_X1978                                                  <dbl> 0, …
## $ Year_opened_X1979                                                  <dbl> 0, …
## $ Year_opened_X1980                                                  <dbl> 0, …
## $ Year_opened_X1981                                                  <dbl> 0, …
## $ Year_opened_X1982                                                  <dbl> 0, …
## $ Year_opened_X1983                                                  <dbl> 0, …
## $ Year_opened_X1984                                                  <dbl> 0, …
## $ Year_opened_X1985                                                  <dbl> 0, …
## $ Year_opened_X1986                                                  <dbl> 0, …
## $ Year_opened_X1987                                                  <dbl> 0, …
## $ Year_opened_X1988                                                  <dbl> 0, …
## $ Year_opened_X1989                                                  <dbl> 0, …
## $ Year_opened_X1990                                                  <dbl> 0, …
## $ Year_opened_X1991                                                  <dbl> 0, …
## $ Year_opened_X1992                                                  <dbl> 0, …
## $ Year_opened_X1993                                                  <dbl> 0, …
## $ Year_opened_X1994                                                  <dbl> 0, …
## $ Year_opened_X1995                                                  <dbl> 0, …
## $ Year_opened_X1996                                                  <dbl> 0, …
## $ Year_opened_X1997                                                  <dbl> 0, …
## $ Year_opened_X1998                                                  <dbl> 0, …
## $ Year_opened_X1999                                                  <dbl> 0, …
## $ Year_opened_X2000                                                  <dbl> 0, …
## $ Year_opened_X2001                                                  <dbl> 0, …
## $ Year_opened_X2002                                                  <dbl> 0, …
## $ Year_opened_X2003                                                  <dbl> 0, …
## $ Year_opened_X2004                                                  <dbl> 0, …
## $ Year_opened_X2005                                                  <dbl> 0, …
## $ Year_opened_X2006                                                  <dbl> 0, …
## $ Year_opened_X2007                                                  <dbl> 0, …
## $ Year_opened_X2008                                                  <dbl> 0, …
## $ Year_opened_X2009                                                  <dbl> 0, …
## $ Year_opened_X2010                                                  <dbl> 0, …
## $ Year_opened_X2011                                                  <dbl> 0, …
## $ Year_opened_X2012                                                  <dbl> 0, …
## $ Year_opened_X2013                                                  <dbl> 1, …
## $ Year_opened_X2014                                                  <dbl> 0, …
## $ Year_opened_X2015                                                  <dbl> 0, …
## $ Year_opened_X2016                                                  <dbl> 0, …
## $ Year_opened_X2017                                                  <dbl> 0, …
## $ Year_opened_X2018                                                  <dbl> 0, …
## $ Year_opened_X2019                                                  <dbl> 0, …
## $ Year_opened_X2020                                                  <dbl> 0, …
## $ Year_opened_X2021                                                  <dbl> 0, …
## $ Year_opened_new                                                    <dbl> 0, …
## $ Area_Deprivation_index_X2                                          <dbl> 0, …
## $ Area_Deprivation_index_X3                                          <dbl> 0, …
## $ Area_Deprivation_index_X4                                          <dbl> 1, …
## $ Area_Deprivation_index_X5                                          <dbl> 0, …
## $ Area_Deprivation_index_X6                                          <dbl> 0, …
## $ Area_Deprivation_index_X7                                          <dbl> 0, …
## $ Area_Deprivation_index_X8                                          <dbl> 0, …
## $ Area_Deprivation_index_X9                                          <dbl> 0, …
## $ Area_Deprivation_index_X10                                         <dbl> 0, …
## $ Area_Deprivation_index_new                                         <dbl> 0, …
## $ Area_Deprivation_index_crime_X2                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X3                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X4                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X5                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X6                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X7                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X8                                    <dbl> 0, …
## $ Area_Deprivation_index_crime_X9                                    <dbl> 1, …
## $ Area_Deprivation_index_crime_X10                                   <dbl> 0, …
## $ Area_Deprivation_index_crime_new                                   <dbl> 0, …
## $ Area_Deprivation_index_education_X2                                <dbl> 0, …
## $ Area_Deprivation_index_education_X3                                <dbl> 0, …
## $ Area_Deprivation_index_education_X4                                <dbl> 0, …
## $ Area_Deprivation_index_education_X5                                <dbl> 0, …
## $ Area_Deprivation_index_education_X6                                <dbl> 1, …
## $ Area_Deprivation_index_education_X7                                <dbl> 0, …
## $ Area_Deprivation_index_education_X8                                <dbl> 0, …
## $ Area_Deprivation_index_education_X9                                <dbl> 0, …
## $ Area_Deprivation_index_education_X10                               <dbl> 0, …
## $ Area_Deprivation_index_education_new                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X2                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X3                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X4                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X5                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X6                               <dbl> 1, …
## $ Area_Deprivation_index_employment_X7                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X8                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X9                               <dbl> 0, …
## $ Area_Deprivation_index_employment_X10                              <dbl> 0, …
## $ Area_Deprivation_index_employment_new                              <dbl> 0, …
## $ Area_Deprivation_index_health_X2                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X3                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X4                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X5                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X6                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X7                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X8                                   <dbl> 1, …
## $ Area_Deprivation_index_health_X9                                   <dbl> 0, …
## $ Area_Deprivation_index_health_X10                                  <dbl> 0, …
## $ Area_Deprivation_index_health_new                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X2                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X3                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X4                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X5                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X6                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X7                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X8                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X9                                  <dbl> 0, …
## $ Area_Deprivation_index_housing_X10                                 <dbl> 0, …
## $ Area_Deprivation_index_housing_new                                 <dbl> 0, …
## $ Area_Deprivation_index_income_X2                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X3                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X4                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X5                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X6                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X7                                   <dbl> 1, …
## $ Area_Deprivation_index_income_X8                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X9                                   <dbl> 0, …
## $ Area_Deprivation_index_income_X10                                  <dbl> 0, …
## $ Area_Deprivation_index_income_new                                  <dbl> 0, …
## $ Area_Deprivation_index_services_X2                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X3                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X4                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X5                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X6                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X7                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X8                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X9                                 <dbl> 0, …
## $ Area_Deprivation_index_services_X10                                <dbl> 0, …
## $ Area_Deprivation_index_services_new                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X1br                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X2ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X2br                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X3ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X3br                                <dbl> 1, …
## $ Area_Geodemographic_group_code_X3cr                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X4ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X5ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X6ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X6br                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X7ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X7br                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X7cr                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X8ar                                <dbl> 0, …
## $ Area_Geodemographic_group_code_X8br                                <dbl> 0, …
## $ Area_Geodemographic_group_code_new                                 <dbl> 0, …

Specify model

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
    loss_reduction = tune(), sample_size = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

Tune hyperparameters

doParallel::registerDoParallel()

set.seed(17375)
xgboost_tune <-
  tune_grid(xgboost_workflow,
            resamples = data_cv,
            grid = 5,
            control = control_grid(save_pred = TRUE))
## Warning: package 'xgboost' was built under R version 4.3.3

Model evaluation

Identify optimal values for hyperparameters

collect_metrics(xgboost_tune)
## # A tibble: 15 × 12
##    trees min_n tree_depth learn_rate loss_reduction sample_size .metric    
##    <int> <int>      <int>      <dbl>          <dbl>       <dbl> <chr>      
##  1   677     7         14    0.00195       5.18e- 4       0.477 accuracy   
##  2   677     7         14    0.00195       5.18e- 4       0.477 brier_class
##  3   677     7         14    0.00195       5.18e- 4       0.477 roc_auc    
##  4  1016    11          4    0.00394       3.13e-10       0.723 accuracy   
##  5  1016    11          4    0.00394       3.13e-10       0.723 brier_class
##  6  1016    11          4    0.00394       3.13e-10       0.723 roc_auc    
##  7  1626    19          8    0.0202        2.06e- 7       0.257 accuracy   
##  8  1626    19          8    0.0202        2.06e- 7       0.257 brier_class
##  9  1626    19          8    0.0202        2.06e- 7       0.257 roc_auc    
## 10  1483    30          5    0.0873        5.18e- 3       0.401 accuracy   
## 11  1483    30          5    0.0873        5.18e- 3       0.401 brier_class
## 12  1483    30          5    0.0873        5.18e- 3       0.401 roc_auc    
## 13   111    39         12    0.238         4.91e- 1       0.850 accuracy   
## 14   111    39         12    0.238         4.91e- 1       0.850 brier_class
## 15   111    39         12    0.238         4.91e- 1       0.850 roc_auc    
## # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
## #   .config <chr>
collect_predictions(xgboost_tune) %>%
    group_by(id) %>%
    roc_curve(Accreditation, .pred_Yes) %>%
    autoplot()

Fit the model for the last time

xgboost_last <- xgboost_workflow %>%
    finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
    last_fit(data_split)
## New names:
## New names:
## • `Address_line_1_Units.1...2` -> `Address_line_1_Units.1`
collect_metrics(xgboost_last)
## # A tibble: 3 × 4
##   .metric     .estimator .estimate .config             
##   <chr>       <chr>          <dbl> <chr>               
## 1 accuracy    binary         0.693 Preprocessor1_Model1
## 2 roc_auc     binary         0.773 Preprocessor1_Model1
## 3 brier_class binary         0.193 Preprocessor1_Model1
collect_predictions((xgboost_last)) %>%
    #mutate(.pred_Accredited = as.factor(.pred_Accredited)) %>%
    yardstick::conf_mat(Accreditation, .pred_class) %>%
    autoplot()

Variable Importance

library(vip)
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
xgboost_last %>%
    workflows::extract_fit_engine() %>%
    vip()