Goal is to predict attrition, employees who are likely to leave the company.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(correlationfunnel)
## ══ correlationfunnel Tip #3 ════════════════════════════════════════════════════
## Using `binarize()` with data containing many columns or many rows can increase dimensionality substantially.
## Try subsetting your data column-wise or row-wise to avoid creating too many columns.
## You can always make a big problem smaller by sampling. :)
library(dplyr)
data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(data)
Name | data |
Number of rows | 4191 |
Number of columns | 35 |
_______________________ | |
Column type frequency: | |
character | 24 |
numeric | 11 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
museum_id | 0 | 1.00 | 8 | 15 | 0 | 4191 | 0 |
Name_of_museum | 0 | 1.00 | 3 | 76 | 0 | 4190 | 0 |
Address_line_1 | 441 | 0.89 | 3 | 61 | 0 | 3212 | 0 |
Address_line_2 | 2816 | 0.33 | 3 | 39 | 0 | 1167 | 0 |
Village,_Town_or_City | 4 | 1.00 | 3 | 24 | 0 | 1696 | 0 |
Postcode | 0 | 1.00 | 6 | 9 | 0 | 3918 | 0 |
Admin_area | 0 | 1.00 | 12 | 137 | 0 | 393 | 0 |
Accreditation | 0 | 1.00 | 10 | 12 | 0 | 2 | 0 |
Governance | 0 | 1.00 | 7 | 41 | 0 | 13 | 0 |
Size | 0 | 1.00 | 4 | 7 | 0 | 5 | 0 |
Size_provenance | 179 | 0.96 | 2 | 29 | 0 | 16 | 0 |
Subject_Matter | 0 | 1.00 | 5 | 45 | 0 | 114 | 0 |
Year_opened | 0 | 1.00 | 9 | 9 | 0 | 351 | 0 |
Year_closed | 0 | 1.00 | 9 | 9 | 0 | 170 | 0 |
DOMUS_Subject_Matter | 2788 | 0.33 | 5 | 27 | 0 | 21 | 0 |
Primary_provenance_of_data | 0 | 1.00 | 3 | 8 | 0 | 18 | 0 |
Identifier_used_in_primary_data_source | 2056 | 0.51 | 2 | 8 | 0 | 2134 | 0 |
Area_Geodemographic_group | 49 | 0.99 | 11 | 40 | 0 | 17 | 0 |
Area_Geodemographic_group_code | 49 | 0.99 | 3 | 3 | 0 | 16 | 0 |
Area_Geodemographic_subgroup | 49 | 0.99 | 12 | 39 | 0 | 25 | 0 |
Area_Geodemographic_subgroup_code | 49 | 0.99 | 4 | 4 | 0 | 24 | 0 |
Area_Geodemographic_supergroup | 49 | 0.99 | 16 | 39 | 0 | 8 | 0 |
Area_Geodemographic_supergroup_code | 49 | 0.99 | 2 | 2 | 0 | 8 | 0 |
Notes | 2980 | 0.29 | 12 | 751 | 0 | 956 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Latitude | 0 | 1.00 | 52.93 | 2.09 | 49.18 | 51.48 | 52.47 | 53.96 | 100.00 | ▇▁▁▁▁ |
Longitude | 0 | 1.00 | -1.96 | 1.84 | -8.09 | -3.10 | -1.87 | -0.48 | 1.76 | ▁▂▇▇▅ |
DOMUS_identifier | 2347 | 0.44 | 1303.45 | 1597.19 | 1.00 | 486.50 | 991.50 | 1470.25 | 7746.00 | ▇▂▁▁▁ |
Area_Deprivation_index | 49 | 0.99 | 5.44 | 2.48 | 1.00 | 4.00 | 5.00 | 7.00 | 10.00 | ▃▆▇▆▃ |
Area_Deprivation_index_crime | 49 | 0.99 | 5.43 | 3.07 | 1.00 | 3.00 | 6.00 | 8.00 | 10.00 | ▇▆▅▇▇ |
Area_Deprivation_index_education | 49 | 0.99 | 6.04 | 2.61 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▅▇▇▆ |
Area_Deprivation_index_employment | 49 | 0.99 | 6.08 | 2.76 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▇▇▇ |
Area_Deprivation_index_health | 49 | 0.99 | 6.02 | 2.82 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▆▇▇ |
Area_Deprivation_index_housing | 49 | 0.99 | 3.97 | 2.75 | 1.00 | 1.00 | 3.00 | 6.00 | 10.00 | ▇▅▃▂▂ |
Area_Deprivation_index_income | 49 | 0.99 | 5.99 | 2.62 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▆▇▇▆ |
Area_Deprivation_index_services | 49 | 0.99 | 4.78 | 3.01 | 1.00 | 2.00 | 4.00 | 7.00 | 10.00 | ▇▅▅▅▅ |
Issues with data * Missing values * Factors or numeric variables * Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code, Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services * Zero variance variables * Character variables: Convert them to numbers in recipe step * Unbalanced target variable: Accreditation * ID variable: museum_id
library(stringr)
data_clean <- data %>%
# Remove unnecessary columns
select(-c(Size_provenance, DOMUS_Subject_Matter, Year_closed, Primary_provenance_of_data,
Identifier_used_in_primary_data_source, Area_Geodemographic_subgroup,
Area_Geodemographic_subgroup_code, Area_Geodemographic_supergroup,
Area_Geodemographic_supergroup_code, Notes, Latitude, Longitude, DOMUS_identifier,
Address_line_2, Postcode, Admin_area, Area_Geodemographic_group, Name_of_museum)) %>%
# Remove rows with missing values
na.omit() %>%
# Change column name and adjust Years opened
rename("Village_Town_City" = "Village,_Town_or_City") %>%
mutate(Year_opened = str_sub(Year_opened, 1, 4)) %>%
# Convert selected columns to factors
mutate(across(c(Accreditation, Governance, Size, Subject_Matter,
Area_Geodemographic_group_code, Area_Deprivation_index,
Area_Deprivation_index_crime, Area_Deprivation_index_education,
Area_Deprivation_index_employment, Area_Deprivation_index_health,
Area_Deprivation_index_housing, Area_Deprivation_index_income,
Area_Deprivation_index_services), as.factor)) %>%
# Recode Accreditation
mutate(Accreditation = if_else(Accreditation == "Accredited", "Yes", "No"))
# Preview the cleaned data
glimpse(data_clean)
## Rows: 3,708
## Columns: 17
## $ museum_id <chr> "mm.New.1", "mm.domus.WM019", "mm.ai…
## $ Address_line_1 <chr> "1 Olympic Way", "Warwick College of…
## $ Village_Town_City <chr> "Belfast", "Moreton Morrell", "Chelt…
## $ Accreditation <chr> "No", "No", "Yes", "No", "No", "Yes"…
## $ Governance <fct> Independent-Not_for_profit, Governme…
## $ Size <fct> large, medium, medium, small, small,…
## $ Subject_Matter <fct> Sea_and_seafaring-Boats_and_ships, R…
## $ Year_opened <chr> "2012", "1984", "2013", "1996", "198…
## $ Area_Deprivation_index <fct> 2, 8, 8, 2, 6, 6, 5, 6, 3, 7, 5, 8, …
## $ Area_Deprivation_index_crime <fct> 3, 9, 10, 1, 10, 3, 1, 10, 1, 7, 10,…
## $ Area_Deprivation_index_education <fct> 1, 8, 7, 6, 8, 7, 7, 6, 4, 7, 6, 6, …
## $ Area_Deprivation_index_employment <fct> 2, 10, 7, 3, 7, 6, 6, 7, 2, 7, 6, 8,…
## $ Area_Deprivation_index_health <fct> 1, 8, 8, 2, 7, 8, 5, 7, 2, 9, 5, 8, …
## $ Area_Deprivation_index_housing <fct> 4, 5, 7, 1, 8, 9, 1, 7, 6, 7, 7, 5, …
## $ Area_Deprivation_index_income <fct> 5, 8, 8, 3, 5, 5, 7, 5, 3, 8, 6, 8, …
## $ Area_Deprivation_index_services <fct> 5, 1, 4, 4, 2, 3, 9, 1, 9, 1, 1, 2, …
## $ Area_Geodemographic_group_code <fct> 2ar, 3ar, 7ar, 5ar, 3cr, 6br, 2ar, 3…
Accredited vs. Unaccredited
data_clean %>% count(Accreditation)
## # A tibble: 2 × 2
## Accreditation n
## <chr> <int>
## 1 No 2040
## 2 Yes 1668
data_clean %>%
ggplot(aes(Accreditation)) +
geom_bar()
Most common Subjects
data_clean %>% count(Subject_Matter) %>% arrange(desc(n))
## # A tibble: 114 × 2
## Subject_Matter n
## <fct> <int>
## 1 Local_Histories 853
## 2 Buildings-Houses-Large_houses 200
## 3 Arts-Fine_and_decorative_arts 186
## 4 War_and_conflict-Regiment 128
## 5 Transport-Trains_and_railways 127
## 6 Mixed-Encyclopaedic 115
## 7 Personality-Literary 80
## 8 Mixed-Other 78
## 9 Other 71
## 10 Buildings-Houses-Medium_houses 67
## # ℹ 104 more rows
data_clean %>%
ggplot(aes(x= Subject_Matter)) +
geom_bar()
Years museums opened
data_clean %>%
ggplot(aes(Year_opened, Accreditation)) +
geom_count()
correlation plot
# Step 1: binarize
data_binarized <- data_clean %>%
select(-museum_id) %>%
binarize()
data_binarized %>% glimpse()
## Rows: 3,708
## Columns: 175
## $ Address_line_1__High_Street <dbl> 0, 0, …
## $ `Address_line_1__-OTHER` <dbl> 1, 1, …
## $ Village_Town_City__Edinburgh <dbl> 0, 0, …
## $ Village_Town_City__London <dbl> 0, 0, …
## $ `Village_Town_City__-OTHER` <dbl> 1, 1, …
## $ Accreditation__No <dbl> 1, 1, …
## $ Accreditation__Yes <dbl> 0, 0, …
## $ `Governance__Government-Local_Authority` <dbl> 0, 1, …
## $ `Governance__Government-National` <dbl> 0, 0, …
## $ `Governance__Independent-English_Heritage` <dbl> 0, 0, …
## $ `Governance__Independent-National_Trust` <dbl> 0, 0, …
## $ `Governance__Independent-Not_for_profit` <dbl> 1, 0, …
## $ `Governance__Independent-Private` <dbl> 0, 0, …
## $ `Governance__Independent-Unknown` <dbl> 0, 0, …
## $ Governance__University <dbl> 0, 0, …
## $ Governance__Unknown <dbl> 0, 0, …
## $ `Governance__-OTHER` <dbl> 0, 0, …
## $ Size__large <dbl> 1, 0, …
## $ Size__medium <dbl> 0, 1, …
## $ Size__small <dbl> 0, 0, …
## $ Size__unknown <dbl> 0, 0, …
## $ `Size__-OTHER` <dbl> 0, 0, …
## $ `Subject_Matter__Archaeology-Roman` <dbl> 0, 0, …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts` <dbl> 0, 0, …
## $ `Subject_Matter__Buildings-Houses-Large_houses` <dbl> 0, 0, …
## $ `Subject_Matter__Buildings-Houses-Medium_houses` <dbl> 0, 0, …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying` <dbl> 0, 0, …
## $ `Subject_Matter__Leisure_and_sport-Toys_and_models` <dbl> 0, 0, …
## $ Subject_Matter__Local_Histories <dbl> 0, 0, …
## $ `Subject_Matter__Mixed-Encyclopaedic` <dbl> 0, 0, …
## $ `Subject_Matter__Mixed-Other` <dbl> 0, 0, …
## $ Subject_Matter__Other <dbl> 0, 0, …
## $ `Subject_Matter__Personality-Literary` <dbl> 0, 0, …
## $ `Subject_Matter__Rural_Industry-Farming` <dbl> 0, 1, …
## $ `Subject_Matter__Sea_and_seafaring-Boats_and_ships` <dbl> 1, 0, …
## $ `Subject_Matter__Sea_and_seafaring-Mixed` <dbl> 0, 0, …
## $ `Subject_Matter__Transport-Cars_and_motorbikes` <dbl> 0, 0, …
## $ `Subject_Matter__Transport-Trains_and_railways` <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Airforce` <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Castles_and_forts` <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Military` <dbl> 0, 0, …
## $ `Subject_Matter__War_and_conflict-Regiment` <dbl> 0, 0, …
## $ `Subject_Matter__-OTHER` <dbl> 0, 0, …
## $ Year_opened__1945 <dbl> 0, 0, …
## $ Year_opened__1960 <dbl> 0, 0, …
## $ Year_opened__1972 <dbl> 0, 0, …
## $ Year_opened__1973 <dbl> 0, 0, …
## $ Year_opened__1974 <dbl> 0, 0, …
## $ Year_opened__1975 <dbl> 0, 0, …
## $ Year_opened__1976 <dbl> 0, 0, …
## $ Year_opened__1977 <dbl> 0, 0, …
## $ Year_opened__1978 <dbl> 0, 0, …
## $ Year_opened__1979 <dbl> 0, 0, …
## $ Year_opened__1980 <dbl> 0, 0, …
## $ Year_opened__1981 <dbl> 0, 0, …
## $ Year_opened__1982 <dbl> 0, 0, …
## $ Year_opened__1983 <dbl> 0, 0, …
## $ Year_opened__1984 <dbl> 0, 1, …
## $ Year_opened__1985 <dbl> 0, 0, …
## $ Year_opened__1986 <dbl> 0, 0, …
## $ Year_opened__1987 <dbl> 0, 0, …
## $ Year_opened__1988 <dbl> 0, 0, …
## $ Year_opened__1989 <dbl> 0, 0, …
## $ Year_opened__1990 <dbl> 0, 0, …
## $ Year_opened__1991 <dbl> 0, 0, …
## $ Year_opened__1992 <dbl> 0, 0, …
## $ Year_opened__1993 <dbl> 0, 0, …
## $ Year_opened__1994 <dbl> 0, 0, …
## $ Year_opened__1995 <dbl> 0, 0, …
## $ Year_opened__1996 <dbl> 0, 0, …
## $ Year_opened__1997 <dbl> 0, 0, …
## $ Year_opened__1998 <dbl> 0, 0, …
## $ Year_opened__1999 <dbl> 0, 0, …
## $ Year_opened__2000 <dbl> 0, 0, …
## $ Year_opened__2002 <dbl> 0, 0, …
## $ Year_opened__2005 <dbl> 0, 0, …
## $ Year_opened__2012 <dbl> 1, 0, …
## $ Year_opened__2014 <dbl> 0, 0, …
## $ `Year_opened__-OTHER` <dbl> 0, 0, …
## $ Area_Deprivation_index__1 <dbl> 0, 0, …
## $ Area_Deprivation_index__2 <dbl> 1, 0, …
## $ Area_Deprivation_index__3 <dbl> 0, 0, …
## $ Area_Deprivation_index__4 <dbl> 0, 0, …
## $ Area_Deprivation_index__5 <dbl> 0, 0, …
## $ Area_Deprivation_index__6 <dbl> 0, 0, …
## $ Area_Deprivation_index__7 <dbl> 0, 0, …
## $ Area_Deprivation_index__8 <dbl> 0, 1, …
## $ Area_Deprivation_index__9 <dbl> 0, 0, …
## $ Area_Deprivation_index__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__1 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__3 <dbl> 1, 0, …
## $ Area_Deprivation_index_crime__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__5 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__8 <dbl> 0, 0, …
## $ Area_Deprivation_index_crime__9 <dbl> 0, 1, …
## $ Area_Deprivation_index_crime__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__1 <dbl> 1, 0, …
## $ Area_Deprivation_index_education__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__5 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__8 <dbl> 0, 1, …
## $ Area_Deprivation_index_education__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_education__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__1 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__2 <dbl> 1, 0, …
## $ Area_Deprivation_index_employment__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__5 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__8 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_employment__10 <dbl> 0, 1, …
## $ Area_Deprivation_index_health__1 <dbl> 1, 0, …
## $ Area_Deprivation_index_health__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__5 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__8 <dbl> 0, 1, …
## $ Area_Deprivation_index_health__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_health__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__1 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__4 <dbl> 1, 0, …
## $ Area_Deprivation_index_housing__5 <dbl> 0, 1, …
## $ Area_Deprivation_index_housing__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__8 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_housing__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__1 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__5 <dbl> 1, 0, …
## $ Area_Deprivation_index_income__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__8 <dbl> 0, 1, …
## $ Area_Deprivation_index_income__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_income__10 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__1 <dbl> 0, 1, …
## $ Area_Deprivation_index_services__2 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__3 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__4 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__5 <dbl> 1, 0, …
## $ Area_Deprivation_index_services__6 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__7 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__8 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__9 <dbl> 0, 0, …
## $ Area_Deprivation_index_services__10 <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__1ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__1br <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__2ar <dbl> 1, 0, …
## $ Area_Geodemographic_group_code__2br <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__3ar <dbl> 0, 1, …
## $ Area_Geodemographic_group_code__3br <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__3cr <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__4ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__5ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__6ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__6br <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7br <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__7cr <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__8ar <dbl> 0, 0, …
## $ Area_Geodemographic_group_code__8br <dbl> 0, 0, …
# Step 2: correlate
data_correlation <- data_binarized %>%
correlate(Accreditation__Yes)
data_correlation
## # A tibble: 175 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 Accreditation No -1
## 2 Accreditation Yes 1
## 3 Governance Independent-Private -0.353
## 4 Size small -0.326
## 5 Size medium 0.253
## 6 Size large 0.242
## 7 Governance Government-Local_Authority 0.221
## 8 Governance Independent-Unknown -0.202
## 9 Size unknown -0.166
## 10 Subject_Matter -OTHER -0.147
## # ℹ 165 more rows
# Step 3: Plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 161 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom 1.0.5 ✔ rsample 1.2.1
## ✔ dials 1.2.1 ✔ tune 1.2.1
## ✔ infer 1.0.7 ✔ workflows 1.1.4
## ✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
## ✔ parsnip 1.2.1 ✔ yardstick 1.3.1
## ✔ recipes 1.0.10
## Warning: package 'modeldata' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
set.seed(1234)
data_clean <- data_clean %>% sample_n(1000)
data_split <- initial_split(data_clean, strata = Accreditation)
data_train <- training(data_split)
data_test <- testing(data_split)
data_cv <- rsample::vfold_cv(data_train, strata = Accreditation)
data_cv
## # 10-fold cross-validation using stratification
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [674/75]> Fold01
## 2 <split [674/75]> Fold02
## 3 <split [674/75]> Fold03
## 4 <split [674/75]> Fold04
## 5 <split [674/75]> Fold05
## 6 <split [674/75]> Fold06
## 7 <split [674/75]> Fold07
## 8 <split [674/75]> Fold08
## 9 <split [674/75]> Fold09
## 10 <split [675/74]> Fold10
{r} # library(usemodels) # use_xgboost(like_count ~ ., data = data_train) #
skimr::skim(data_clean)
Name | data_clean |
Number of rows | 1000 |
Number of columns | 17 |
_______________________ | |
Column type frequency: | |
character | 5 |
factor | 12 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
museum_id | 0 | 1 | 8 | 15 | 0 | 1000 | 0 |
Address_line_1 | 0 | 1 | 4 | 52 | 0 | 925 | 0 |
Village_Town_City | 0 | 1 | 3 | 24 | 0 | 618 | 0 |
Accreditation | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
Year_opened | 0 | 1 | 4 | 4 | 0 | 161 | 0 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Governance | 0 | 1 | FALSE | 12 | Ind: 439, Gov: 251, Ind: 119, Ind: 40 |
Size | 0 | 1 | FALSE | 5 | sma: 553, med: 258, lar: 136, unk: 51 |
Subject_Matter | 0 | 1 | FALSE | 108 | Loc: 244, Art: 57, Bui: 50, War: 34 |
Area_Deprivation_index | 0 | 1 | FALSE | 10 | 6: 133, 4: 132, 5: 122, 7: 114 |
Area_Deprivation_index_crime | 0 | 1 | FALSE | 10 | 1: 162, 2: 118, 4: 102, 10: 99 |
Area_Deprivation_index_education | 0 | 1 | FALSE | 10 | 7: 146, 5: 116, 8: 113, 6: 111 |
Area_Deprivation_index_employment | 0 | 1 | FALSE | 10 | 10: 147, 5: 119, 7: 110, 4: 109 |
Area_Deprivation_index_health | 0 | 1 | FALSE | 10 | 10: 126, 5: 123, 8: 116, 7: 106 |
Area_Deprivation_index_housing | 0 | 1 | FALSE | 10 | 1: 261, 2: 150, 3: 127, 4: 86 |
Area_Deprivation_index_income | 0 | 1 | FALSE | 10 | 6: 127, 5: 121, 4: 119, 7: 115 |
Area_Deprivation_index_services | 0 | 1 | FALSE | 10 | 1: 166, 9: 107, 2: 106, 3: 99 |
Area_Geodemographic_group_code | 0 | 1 | FALSE | 16 | 3ar: 164, 2ar: 106, 3br: 102, 7ar: 96 |
library(themis)
library(recipes)
library(textrecipes)
xgboost_recipe <- recipes::recipe(Accreditation ~ ., data = data_train) %>%
update_role(museum_id, new_role = "ID") %>%
step_tokenize(Village_Town_City) %>%
step_tokenfilter(Village_Town_City, max_tokens = 50) %>%
step_tf(Village_Town_City) %>%
step_novel(all_nominal_predictors()) %>%
step_dummy(all_nominal_predictors()) %>%
step_smote(Accreditation)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## New names:
## • `Address_line_1_Units.1...2` -> `Address_line_1_Units.1`
## Rows: 818
## Columns: 1,135
## $ museum_id <fct> mm.…
## $ Accreditation <fct> No,…
## $ tf_Village_Town_City_aberdeen <dbl> 0, …
## $ tf_Village_Town_City_anstruther <dbl> 0, …
## $ tf_Village_Town_City_bath <dbl> 0, …
## $ tf_Village_Town_City_belfast <dbl> 0, …
## $ tf_Village_Town_City_birmingham <dbl> 0, …
## $ tf_Village_Town_City_brighton <dbl> 0, …
## $ tf_Village_Town_City_bristol <dbl> 0, …
## $ tf_Village_Town_City_bury <dbl> 0, …
## $ tf_Village_Town_City_by <dbl> 0, …
## $ tf_Village_Town_City_cambridge <dbl> 0, …
## $ tf_Village_Town_City_cardiff <dbl> 0, …
## $ tf_Village_Town_City_chester <dbl> 0, …
## $ tf_Village_Town_City_chichester <dbl> 0, …
## $ tf_Village_Town_City_colchester <dbl> 0, …
## $ tf_Village_Town_City_cowes <dbl> 0, …
## $ tf_Village_Town_City_doncaster <dbl> 0, …
## $ tf_Village_Town_City_dundee <dbl> 0, …
## $ tf_Village_Town_City_east <dbl> 0, …
## $ tf_Village_Town_City_eastbourne <dbl> 0, …
## $ tf_Village_Town_City_edinburgh <dbl> 0, …
## $ tf_Village_Town_City_edmunds <dbl> 0, …
## $ tf_Village_Town_City_exeter <dbl> 0, …
## $ tf_Village_Town_City_glasgow <dbl> 0, …
## $ tf_Village_Town_City_isle <dbl> 0, …
## $ tf_Village_Town_City_kings <dbl> 0, …
## $ tf_Village_Town_City_lancaster <dbl> 0, …
## $ tf_Village_Town_City_leeds <dbl> 0, …
## $ tf_Village_Town_City_leicester <dbl> 0, …
## $ tf_Village_Town_City_lerwick <dbl> 0, …
## $ tf_Village_Town_City_liverpool <dbl> 0, …
## $ tf_Village_Town_City_llangollen <dbl> 0, …
## $ tf_Village_Town_City_london <dbl> 0, …
## $ tf_Village_Town_City_lynn <dbl> 0, …
## $ tf_Village_Town_City_manchester <dbl> 0, …
## $ tf_Village_Town_City_milton <dbl> 0, …
## $ tf_Village_Town_City_newcastle <dbl> 0, …
## $ tf_Village_Town_City_newport <dbl> 1, …
## $ tf_Village_Town_City_north <dbl> 0, …
## $ tf_Village_Town_City_northampton <dbl> 0, …
## $ tf_Village_Town_City_norwich <dbl> 0, …
## $ tf_Village_Town_City_nr <dbl> 0, …
## $ tf_Village_Town_City_of <dbl> 0, …
## $ tf_Village_Town_City_on <dbl> 0, …
## $ tf_Village_Town_City_sheffield <dbl> 0, …
## $ tf_Village_Town_City_spalding <dbl> 0, …
## $ tf_Village_Town_City_st <dbl> 0, …
## $ tf_Village_Town_City_tyne <dbl> 0, …
## $ tf_Village_Town_City_upon <dbl> 0, …
## $ tf_Village_Town_City_wells <dbl> 0, …
## $ tf_Village_Town_City_york <dbl> 0, …
## $ Address_line_1_X1.Scala.St <dbl> 0, …
## $ Address_line_1_X1.Senhouse.Street <dbl> 0, …
## $ Address_line_1_X1.Warehouse <dbl> 0, …
## $ Address_line_1_X1.West.Street <dbl> 0, …
## $ Address_line_1_X1.3.Richmond.Road <dbl> 0, …
## $ Address_line_1_X10.Buxton.Rd <dbl> 0, …
## $ Address_line_1_X10.Stone.Buildings <dbl> 0, …
## $ Address_line_1_X100.Church.Street <dbl> 0, …
## $ Address_line_1_X100.London.Road <dbl> 0, …
## $ Address_line_1_X100.100A.Hackford.Road <dbl> 0, …
## $ Address_line_1_X107.High.Street <dbl> 0, …
## $ Address_line_1_X109.Head.Street <dbl> 0, …
## $ Address_line_1_X109A.High.Street <dbl> 0, …
## $ Address_line_1_X11.Spring.Hill <dbl> 0, …
## $ Address_line_1_X12.Crooms.Hill <dbl> 0, …
## $ Address_line_1_X12.13.New.Wharf.Road <dbl> 0, …
## $ Address_line_1_X126.High.Street <dbl> 0, …
## $ Address_line_1_X129.Muir.Street <dbl> 0, …
## $ Address_line_1_X13.Berkeley.Street <dbl> 0, …
## $ Address_line_1_X13.High.Street <dbl> 0, …
## $ Address_line_1_X134.Mount.Merrion.Avenue <dbl> 0, …
## $ Address_line_1_X136.Kingsland.Road <dbl> 0, …
## $ Address_line_1_X13a.Laporte.Precinct <dbl> 0, …
## $ Address_line_1_X14.High.Street <dbl> 0, …
## $ Address_line_1_X15.Lodge.Street <dbl> 0, …
## $ Address_line_1_X16.Bridge.Street <dbl> 0, …
## $ Address_line_1_X16.Rutland.Way <dbl> 0, …
## $ Address_line_1_X16.18.Queen.Square <dbl> 0, …
## $ Address_line_1_X169.High.Street <dbl> 0, …
## $ Address_line_1_X17.Donegall.Square.North <dbl> 0, …
## $ Address_line_1_X17.18.The.Hard <dbl> 0, …
## $ Address_line_1_X170.Salmon.Lane <dbl> 0, …
## $ Address_line_1_X19.Church.Rd <dbl> 0, …
## $ Address_line_1_X194.High.Street <dbl> 0, …
## $ Address_line_1_X194.Odmaston.Road <dbl> 0, …
## $ Address_line_1_X195.Aylestone.Road <dbl> 0, …
## $ Address_line_1_X2.Boneybefore.Road <dbl> 0, …
## $ Address_line_1_X2.Buddle.Street <dbl> 0, …
## $ Address_line_1_X2.Carlisle.Road <dbl> 0, …
## $ Address_line_1_X2.Castle.Street <dbl> 0, …
## $ Address_line_1_X2.Old.Bank <dbl> 0, …
## $ Address_line_1_X2.Victoria.Parade <dbl> 0, …
## $ Address_line_1_X20.Cornfield.Terrace <dbl> 0, …
## $ Address_line_1_X20.Maresfield.Gardens <dbl> 0, …
## $ Address_line_1_X20.Princes.Gate <dbl> 0, …
## $ Address_line_1_X21.South.Street <dbl> 0, …
## $ Address_line_1_X217.Wakeham <dbl> 0, …
## $ Address_line_1_X22.St.Georges.Road <dbl> 0, …
## $ Address_line_1_X224.228.Great.Portland.Street <dbl> 0, …
## $ Address_line_1_X224.238.Kensington.High.Street <dbl> 0, …
## $ Address_line_1_X23.27.High.Street <dbl> 0, …
## $ Address_line_1_X233.Bangor.Road <dbl> 0, …
## $ Address_line_1_X24.Cheyne.Row <dbl> 0, …
## $ Address_line_1_X24.Limes.Avenue <dbl> 0, …
## $ Address_line_1_X24.Main.Street <dbl> 0, …
## $ Address_line_1_X240.Swithland.Lane <dbl> 0, …
## $ Address_line_1_X25.Brook.Street <dbl> 0, …
## $ Address_line_1_X25.27.West.Street <dbl> 0, …
## $ Address_line_1_X250c.King.Road.Arches <dbl> 0, …
## $ Address_line_1_X257.Castle.St <dbl> 0, …
## $ Address_line_1_X268.Waldegrave.Road <dbl> 0, …
## $ Address_line_1_X27.Grosvenor.Street <dbl> 0, …
## $ Address_line_1_X27.Lower.Fore.Street <dbl> 0, …
## $ Address_line_1_X27.Lowther.Street <dbl> 0, …
## $ Address_line_1_X28.Plough.Hill <dbl> 0, …
## $ Address_line_1_X28.Scotts.Place <dbl> 0, …
## $ Address_line_1_X29.French.Street <dbl> 0, …
## $ Address_line_1_X3.Commercial.Street <dbl> 0, …
## $ Address_line_1_X3.East.Park.Road <dbl> 0, …
## $ Address_line_1_X3.East.Street <dbl> 0, …
## $ Address_line_1_X3.Main.Street <dbl> 0, …
## $ Address_line_1_X3.Shore.Road <dbl> 0, …
## $ Address_line_1_X3.West.Street <dbl> 0, …
## $ Address_line_1_X31.Wood.Street <dbl> 0, …
## $ Address_line_1_X320.Camp.Road <dbl> 0, …
## $ Address_line_1_X33.The.Square <dbl> 0, …
## $ Address_line_1_X346.High.Street <dbl> 0, …
## $ Address_line_1_X35.The.Meadows <dbl> 0, …
## $ Address_line_1_X35.Townfield.Gardens <dbl> 0, …
## $ Address_line_1_X36.High.Street <dbl> 0, …
## $ Address_line_1_X368.Cregagh.Road <dbl> 0, …
## $ Address_line_1_X39.Christchurch.Road <dbl> 0, …
## $ Address_line_1_X4.Buckland.Terrace <dbl> 0, …
## $ Address_line_1_X40.Southwark.Street <dbl> 0, …
## $ Address_line_1_X41.Devonshire.Place <dbl> 0, …
## $ Address_line_1_X42.High.Street <dbl> 0, …
## $ Address_line_1_X45.Market.Street <dbl> 0, …
## $ Address_line_1_X46.Clwyd.Street <dbl> 0, …
## $ Address_line_1_X46.Queen.Street <dbl> 0, …
## $ Address_line_1_X46.50.Copperfield.Road <dbl> 0, …
## $ Address_line_1_X5.6.St..Matthews.Street <dbl> 0, …
## $ Address_line_1_X5.7.Mandeville.Place <dbl> 0, …
## $ Address_line_1_X501.Wherstead.Road <dbl> 0, …
## $ Address_line_1_X51.Culver.Road <dbl> 0, …
## $ Address_line_1_X52.Alfred.Street <dbl> 0, …
## $ Address_line_1_X55.63.Hurst.Street <dbl> 0, …
## $ Address_line_1_X57a.Newton.Street <dbl> 0, …
## $ Address_line_1_X58a.High.Street <dbl> 0, …
## $ Address_line_1_X6.Angel.Hill <dbl> 0, …
## $ Address_line_1_X6.Chapel.Street <dbl> 0, …
## $ Address_line_1_X6.Church.Close <dbl> 0, …
## $ Address_line_1_X6.King.Street <dbl> 0, …
## $ Address_line_1_X6.The.Square <dbl> 0, …
## $ Address_line_1_X6.7.Queens.Terrace <dbl> 0, …
## $ Address_line_1_X60.62.Nelson.Street <dbl> 0, …
## $ Address_line_1_X61.Brook.Street <dbl> 0, …
## $ Address_line_1_X61.Chestergate <dbl> 0, …
## $ Address_line_1_X62.64.Marketgate <dbl> 0, …
## $ Address_line_1_X670.Warwick.Road <dbl> 0, …
## $ Address_line_1_X69.High.St <dbl> 0, …
## $ Address_line_1_X70.Fore.Street <dbl> 0, …
## $ Address_line_1_X74.High.Street <dbl> 0, …
## $ Address_line_1_X74.The.Headrow <dbl> 0, …
## $ Address_line_1_X75.79.Vyse.Street <dbl> 0, …
## $ Address_line_1_X78.High.Street <dbl> 0, …
## $ Address_line_1_X7A.The.Scores <dbl> 0, …
## $ Address_line_1_X8.Park.Street <dbl> 0, …
## $ Address_line_1_X8.10.Queen.Street <dbl> 0, …
## $ Address_line_1_X80b.The.Chase <dbl> 0, …
## $ Address_line_1_X9.Broad.Street <dbl> 0, …
## $ Address_line_1_X9.Holy.Bones <dbl> 0, …
## $ Address_line_1_X90.Buckingham.Palace.Road <dbl> 0, …
## $ Address_line_1_X92.Beacons.Park <dbl> 0, …
## $ Address_line_1_Abbey.Gate <dbl> 0, …
## $ Address_line_1_Abbeymill.Business.Centre <dbl> 0, …
## $ Address_line_1_Abbot.Hall <dbl> 0, …
## $ Address_line_1_Aberystwyth.Arts.Centre <dbl> 0, …
## $ Address_line_1_Abington.Park <dbl> 0, …
## $ Address_line_1_AGC.Centre <dbl> 0, …
## $ Address_line_1_All.Saints.Church <dbl> 0, …
## $ Address_line_1_All.Saints.Square <dbl> 0, …
## $ Address_line_1_Alumni.House <dbl> 0, …
## $ Address_line_1_American.Express.Stadium <dbl> 0, …
## $ Address_line_1_Annay.Road <dbl> 0, …
## $ Address_line_1_Anstruther.Harbour <dbl> 0, …
## $ Address_line_1_Apartment.4 <dbl> 0, …
## $ Address_line_1_Appletongate <dbl> 0, …
## $ Address_line_1_April.Cottage <dbl> 0, …
## $ Address_line_1_Arbroath.Library <dbl> 0, …
## $ Address_line_1_Arbuthnot.Museum <dbl> 0, …
## $ Address_line_1_Arundel.Gate <dbl> 0, …
## $ Address_line_1_Ash.Centre <dbl> 0, …
## $ Address_line_1_Ashby.Road <dbl> 0, …
## $ Address_line_1_Ashland <dbl> 0, …
## $ Address_line_1_Ashton.Town.Hall <dbl> 0, …
## $ Address_line_1_Ashworth.Barracks <dbl> 0, …
## $ Address_line_1_Avenue.House <dbl> 0, …
## $ Address_line_1_Aykley.Heads <dbl> 0, …
## $ Address_line_1_Baldwins.Crescent <dbl> 0, …
## $ Address_line_1_Ballbathie <dbl> 0, …
## $ Address_line_1_Bamburgh.Castle <dbl> 0, …
## $ Address_line_1_Bark.House.Lane <dbl> 0, …
## $ Address_line_1_Barkers.Factory.Complex <dbl> 0, …
## $ Address_line_1_Barr.St <dbl> 0, …
## $ Address_line_1_Barras.Street <dbl> 0, …
## $ Address_line_1_Barry.Island.Station <dbl> 0, …
## $ Address_line_1_Barton.Lane <dbl> 0, …
## $ Address_line_1_Barton.Meade.House <dbl> 0, …
## $ Address_line_1_Bashley.Manor <dbl> 0, …
## $ Address_line_1_Battle.Abbey <dbl> 0, …
## $ Address_line_1_Beach.of.Houbie <dbl> 0, …
## $ Address_line_1_Beacon.Street <dbl> 0, …
## $ Address_line_1_Beckford.Road <dbl> 0, …
## $ Address_line_1_Becks.Square <dbl> 0, …
## $ Address_line_1_Bedale.Hall <dbl> 0, …
## $ Address_line_1_Benenden.Hospital <dbl> 0, …
## $ Address_line_1_Birkenhead.Town.Hall <dbl> 0, …
## $ Address_line_1_Blackridge.Library <dbl> 0, …
## $ Address_line_1_Blacksmiths.Shop.Centre <dbl> 0, …
## $ Address_line_1_Blakesley.Road <dbl> 0, …
## $ Address_line_1_Blenheim.Palace.Sawmills <dbl> 0, …
## $ Address_line_1_Block.H <dbl> 0, …
## $ Address_line_1_Bod.of.Gremista <dbl> 0, …
## $ Address_line_1_Borough.Road <dbl> 0, …
## $ Address_line_1_Boyle.Street <dbl> 0, …
## $ Address_line_1_Brampton.Road <dbl> 0, …
## $ Address_line_1_Bridge.Street <dbl> 0, …
## $ Address_line_1_Broad.Street <dbl> 0, …
## $ Address_line_1_Broadclyst <dbl> 0, …
## $ Address_line_1_Brock.Barracks <dbl> 0, …
## $ Address_line_1_Bronte.Parsonage.Museum <dbl> 0, …
## $ Address_line_1_Brookend.Street <dbl> 0, …
## $ Address_line_1_Broomfield <dbl> 0, …
## $ Address_line_1_Bruce.Embankment <dbl> 0, …
## $ Address_line_1_Buile.Hill.Park <dbl> 0, …
## $ Address_line_1_Burnby.Hall.Gardens <dbl> 0, …
## $ Address_line_1_Burslem.Town.Hall <dbl> 0, …
## $ Address_line_1_Bushmead.Community.Centre <dbl> 0, …
## $ Address_line_1_Butchery.Lane <dbl> 0, …
## $ Address_line_1_Butterly.Station <dbl> 0, …
## $ Address_line_1_Byland <dbl> 0, …
## $ Address_line_1_c.o.7.Hospital.Lane <dbl> 0, …
## $ Address_line_1_c.o.Chambers.Institute <dbl> 0, …
## $ Address_line_1_c.o.Spennymoor.Town.Hall <dbl> 0, …
## $ Address_line_1_Callendar.Park <dbl> 0, …
## $ Address_line_1_Camp.Mill <dbl> 0, …
## $ Address_line_1_Canolfan.Iaith.Clwyd <dbl> 0, …
## $ Address_line_1_Canolfan.Thomas.Telford <dbl> 0, …
## $ Address_line_1_Canterbury.Christ.Church.University <dbl> 0, …
## $ Address_line_1_Cark.in.Cartmel <dbl> 0, …
## $ Address_line_1_Castle.Approach <dbl> 0, …
## $ Address_line_1_Castle.Campbell.Hall <dbl> 0, …
## $ Address_line_1_Castle.House <dbl> 0, …
## $ Address_line_1_Castle.Lane <dbl> 0, …
## $ Address_line_1_Castle.Street <dbl> 0, …
## $ Address_line_1_Castle.Wynd <dbl> 0, …
## $ Address_line_1_Castleford.Library <dbl> 0, …
## $ Address_line_1_Causeway.House <dbl> 0, …
## $ Address_line_1_Caverswall.Road.Station <dbl> 0, …
## $ Address_line_1_Central.Baptist.Church <dbl> 0, …
## $ Address_line_1_Central.House <dbl> 0, …
## $ Address_line_1_Central.Museum <dbl> 0, …
## $ Address_line_1_Chapel.Hill <dbl> 0, …
## $ Address_line_1_Chapel.Lane <dbl> 0, …
## $ Address_line_1_Charing.Cross <dbl> 0, …
## $ Address_line_1_Charleston <dbl> 0, …
## $ Address_line_1_Cheney.School <dbl> 0, …
## $ Address_line_1_Chesters.Roman.Fort <dbl> 0, …
## $ Address_line_1_Christ.Church <dbl> 0, …
## $ Address_line_1_Christchurch.Park <dbl> 0, …
## $ Address_line_1_Church.Bay <dbl> 0, …
## $ Address_line_1_Church.Chare <dbl> 0, …
## $ Address_line_1_Church.Hill <dbl> 0, …
## $ Address_line_1_Church.Lane <dbl> 0, …
## $ Address_line_1_Church.Point <dbl> 0, …
## $ Address_line_1_Church.Road <dbl> 0, …
## $ Address_line_1_Church.Square <dbl> 0, …
## $ Address_line_1_Church.Street <dbl> 0, …
## $ Address_line_1_Churchgate <dbl> 0, …
## $ Address_line_1_City.Hall <dbl> 0, …
## $ Address_line_1_Civic.Centre <dbl> 0, …
## $ Address_line_1_Claverton.Manor <dbl> 0, …
## $ Address_line_1_Clay.Lake <dbl> 0, …
## $ Address_line_1_Clifton.Dykes <dbl> 0, …
## $ Address_line_1_Clifton.Lane <dbl> 0, …
## $ Address_line_1_Clive.Steps <dbl> 0, …
## $ Address_line_1_Clynnog.Fawr <dbl> 0, …
## $ Address_line_1_Coalisland.Enterprise.Centre <dbl> 0, …
## $ Address_line_1_College.Hill <dbl> 0, …
## $ Address_line_1_Conifer.Cottage <dbl> 0, …
## $ Address_line_1_Corrigall <dbl> 0, …
## $ Address_line_1_Cotehele.Quay <dbl> 0, …
## $ Address_line_1_Cottage.Lane <dbl> 0, …
## $ Address_line_1_County.Park <dbl> 0, …
## $ Address_line_1_County.Sessions.House <dbl> 0, …
## $ Address_line_1_Court.7.Rear.of.Shakespeare.Public.House <dbl> 0, …
## $ Address_line_1_Cranmore.Railway.Station <dbl> 0, …
## $ Address_line_1_Crich <dbl> 0, …
## $ Address_line_1_Cromwell.Road <dbl> 0, …
## $ Address_line_1_Cross.Street <dbl> 0, …
## $ Address_line_1_Cunard.Building <dbl> 0, …
## $ Address_line_1_Custom.House <dbl> 0, …
## $ Address_line_1_Cusworth.Hall <dbl> 0, …
## $ Address_line_1_Cypress.Road <dbl> 0, …
## $ Address_line_1_Dalkeith.Hall <dbl> 0, …
## $ Address_line_1_Darby.Road <dbl> 0, …
## $ Address_line_1_Daws.House <dbl> 0, …
## $ Address_line_1_Defence.Intelligence...Security.Centre <dbl> 0, …
## $ Address_line_1_Dent <dbl> 0, …
## $ Address_line_1_Department.of.Earth.Sciences <dbl> 0, …
## $ Address_line_1_Department.of.Plant.Sciences <dbl> 0, …
## $ Address_line_1_Depot.Road <dbl> 0, …
## $ Address_line_1_Derwent.Valley.Visitor.Centre <dbl> 0, …
## $ Address_line_1_Derwent.Works <dbl> 0, …
## $ Address_line_1_Devonshire.Park <dbl> 0, …
## $ Address_line_1_Dick.Institute <dbl> 0, …
## $ Address_line_1_Dinosaur.Farm <dbl> 1, …
## $ Address_line_1_Dinting.Lane <dbl> 0, …
## $ Address_line_1_Discovery.Centre <dbl> 0, …
## $ Address_line_1_Discovery.Point <dbl> 0, …
## $ Address_line_1_Discovery.Road <dbl> 0, …
## $ Address_line_1_Dogdyke.Road <dbl> 0, …
## $ Address_line_1_Doncaster.Museum...Art.Gallery <dbl> 0, …
## $ Address_line_1_Dornoch.Heritage.Society.Trust <dbl> 0, …
## $ Address_line_1_Dre.Fach.Felindre <dbl> 0, …
## $ Address_line_1_Dundee.Road <dbl> 0, …
## $ Address_line_1_Dunrobin.Castle <dbl> 0, …
## $ Address_line_1_Dunster <dbl> 0, …
## $ Address_line_1_Duxford.Airfield <dbl> 0, …
## $ Address_line_1_Dyrham.Park <dbl> 0, …
## $ Address_line_1_Earlswood.Village.Hall <dbl> 0, …
## $ Address_line_1_East.Cottages <dbl> 0, …
## $ Address_line_1_East.Princes.Street.Gardens <dbl> 0, …
## $ Address_line_1_Easterbook <dbl> 0, …
## $ Address_line_1_Eden.Valley.Museum <dbl> 0, …
## $ Address_line_1_Edwards.Building <dbl> 0, …
## $ Address_line_1_Englesea.Brook.Lane <dbl> 0, …
## $ Address_line_1_Evelyn.Woods.Road <dbl> 0, …
## $ Address_line_1_Exchange.Street <dbl> 0, …
## $ Address_line_1_Exhibition.Square <dbl> 0, …
## $ Address_line_1_Falcon.Centre <dbl> 0, …
## $ Address_line_1_Fall.Barn.Road <dbl> 0, …
## $ Address_line_1_Field.Farm <dbl> 0, …
## $ Address_line_1_Filkins <dbl> 0, …
## $ Address_line_1_Fleece.Yard <dbl> 0, …
## $ Address_line_1_Fleet.Street <dbl> 0, …
## $ Address_line_1_Fochabers.Folk.Museum <dbl> 0, …
## $ Address_line_1_Ford.End.Farm <dbl> 0, …
## $ Address_line_1_Fore.Street <dbl> 0, …
## $ Address_line_1_Fort.Amherst <dbl> 0, …
## $ Address_line_1_Foundry.Street <dbl> 0, …
## $ Address_line_1_Framework.Knitters.Cottages <dbl> 0, …
## $ Address_line_1_Francis.Street <dbl> 0, …
## $ Address_line_1_Fraser.Noble.Building <dbl> 0, …
## $ Address_line_1_Freemasons.Hall <dbl> 0, …
## $ Address_line_1_Frogmore.Paper.Mill <dbl> 0, …
## $ Address_line_1_Furnace.Lane <dbl> 0, …
## $ Address_line_1_Galleries.and.Museums.Department <dbl> 0, …
## $ Address_line_1_Gallery.Walk <dbl> 0, …
## $ Address_line_1_Gasworks.Road <dbl> 0, …
## $ Address_line_1_George.Edward.Hall <dbl> 0, …
## $ Address_line_1_Gilfach.Ddu <dbl> 0, …
## $ Address_line_1_Glamorgan.Cricket <dbl> 0, …
## $ Address_line_1_Globe.House <dbl> 0, …
## $ Address_line_1_Gloucester.Court.Mews <dbl> 0, …
## $ Address_line_1_Godinton.House <dbl> 0, …
## $ Address_line_1_Gold.Street <dbl> 0, …
## $ Address_line_1_Gomshall.Lane <dbl> 0, …
## $ Address_line_1_Gossage.Building <dbl> 0, …
## $ Address_line_1_Grammar.School.Walk <dbl> 0, …
## $ Address_line_1_Great.Amwell <dbl> 0, …
## $ Address_line_1_Great.Hall.Complex <dbl> 0, …
## $ Address_line_1_Great.Pulteney.Street <dbl> 0, …
## $ Address_line_1_Great.Russell.Street <dbl> 0, …
## $ Address_line_1_Greaves.Street <dbl> 0, …
## $ Address_line_1_Green.End <dbl> 0, …
## $ Address_line_1_Gremista <dbl> 0, …
## $ Address_line_1_Ground.Floor <dbl> 0, …
## $ Address_line_1_Grove.Road <dbl> 0, …
## $ Address_line_1_Guildford.Lawn <dbl> 0, …
## $ Address_line_1_Guildhall <dbl> 0, …
## $ Address_line_1_Guildhall.Library <dbl> 0, …
## $ Address_line_1_Gun.Garden <dbl> 0, …
## $ Address_line_1_Gunby <dbl> 0, …
## $ Address_line_1_Gunners.Way <dbl> 0, …
## $ Address_line_1_Gwy.House <dbl> 0, …
## $ Address_line_1_Habergham.Dr <dbl> 0, …
## $ Address_line_1_Hackmans.Lane <dbl> 0, …
## $ Address_line_1_Halifax.Road <dbl> 0, …
## $ Address_line_1_Halifax.Way <dbl> 0, …
## $ Address_line_1_Hall.Lane <dbl> 0, …
## $ Address_line_1_Hallam.Road <dbl> 0, …
## $ Address_line_1_Hambledon <dbl> 0, …
## $ Address_line_1_Hamilton.Street <dbl> 0, …
## $ Address_line_1_Hampton.Cottage <dbl> 0, …
## $ Address_line_1_Harbour.Road <dbl> 0, …
## $ Address_line_1_Harbour.Square <dbl> 0, …
## $ Address_line_1_Harlow.Carr.Botanical.Gardens <dbl> 0, …
## $ Address_line_1_Harveys.Lane <dbl> 0, …
## $ Address_line_1_Hays.Way <dbl> 0, …
## $ Address_line_1_Headquarters <dbl> 0, …
## $ Address_line_1_Heatherslaw.Mill <dbl> 0, …
## $ Address_line_1_Heaton.Park <dbl> 0, …
## $ Address_line_1_Heron.House <dbl> 0, …
## $ Address_line_1_High.Street <dbl> 0, …
## $ Address_line_1_High.Town <dbl> 0, …
## $ Address_line_1_Higher.Stennack <dbl> 0, …
## $ Address_line_1_Hill.Street <dbl> 0, …
## $ Address_line_1_Hill.Top <dbl> 0, …
## $ Address_line_1_Holcombe.Road <dbl> 0, …
## $ Address_line_1_Holt.Road <dbl> 0, …
## $ Address_line_1_Hughenden.Manor <dbl> 0, …
## $ Address_line_1_Hull.Road <dbl> 0, …
## $ Address_line_1_Hunsbury.Hill.Road <dbl> 0, …
## $ Address_line_1_Huntercombe <dbl> 0, …
## $ Address_line_1_Hyde.Drive <dbl> 0, …
## $ Address_line_1_Imperial.War.Museum <dbl> 0, …
## $ Address_line_1_Isle.Martin <dbl> 0, …
## $ Address_line_1_Ivychurch.Road <dbl> 0, …
## $ Address_line_1_John.Paul.Jones.Cottage <dbl> 0, …
## $ Address_line_1_Kelham.Island.Museum <dbl> 0, …
## $ Address_line_1_Kensington.Gardens <dbl> 0, …
## $ Address_line_1_Kent.Street <dbl> 0, …
## $ Address_line_1_Keogh.Barracks <dbl> 0, …
## $ Address_line_1_Kew.Gardens <dbl> 0, …
## $ Address_line_1_King.Street <dbl> 0, …
## $ Address_line_1_Kings.Yard <dbl> 0, …
## $ Address_line_1_Kingsway <dbl> 0, …
## $ Address_line_1_Kiplin <dbl> 0, …
## $ Address_line_1_Kirkleatham <dbl> 0, …
## $ Address_line_1_Kirkwynd <dbl> 0, …
## $ Address_line_1_Kneller.Hall <dbl> 0, …
## $ Address_line_1_Laidhay <dbl> 0, …
## $ Address_line_1_Laird.Forge.Buildings <dbl> 0, …
## $ Address_line_1_Lamberhurst <dbl> 0, …
## $ Address_line_1_Lambeth.Palace.Road <dbl> 0, …
## $ Address_line_1_Lancaster.University <dbl> 0, …
## $ Address_line_1_Lanehead <dbl> 0, …
## $ Address_line_1_Lavender.Pond.and.Nature.Park <dbl> 0, …
## $ Address_line_1_Lawn.Road <dbl> 0, …
## $ Address_line_1_Left.Bank <dbl> 0, …
## $ Address_line_1_Leigh.Town.Hall <dbl> 0, …
## $ Address_line_1_Lighthouse.Road <dbl> 0, …
## $ Address_line_1_Lineside <dbl> 0, …
## $ Address_line_1_Linlithgow.Canal.Centre <dbl> 0, …
## $ Address_line_1_Liskeard.Road <dbl> 0, …
## $ Address_line_1_Little.Heath <dbl> 0, …
## $ Address_line_1_Livingston.Mill <dbl> 0, …
## $ Address_line_1_Lizard.Point <dbl> 0, …
## $ Address_line_1_Llandwrog <dbl> 0, …
## $ Address_line_1_Llanfrecha.Way <dbl> 0, …
## $ Address_line_1_Llanthony.Warehouse <dbl> 0, …
## $ Address_line_1_Lock.Lane <dbl> 0, …
## $ Address_line_1_Loco.Works.Road <dbl> 0, …
## $ Address_line_1_Lodge.Hill.Lane <dbl> 0, …
## $ Address_line_1_Lord.Street <dbl> 0, …
## $ Address_line_1_Lough.Neath.Discovery.Centre <dbl> 0, …
## $ Address_line_1_Low.Lighthouse <dbl> 0, …
## $ Address_line_1_Lower.High.Street <dbl> 0, …
## $ Address_line_1_Lower.Hillhead <dbl> 0, …
## $ Address_line_1_Lower.Road <dbl> 0, …
## $ Address_line_1_Lyness <dbl> 0, …
## $ Address_line_1_Main.Road <dbl> 0, …
## $ Address_line_1_Main.Street <dbl> 0, …
## $ Address_line_1_Marischal.College <dbl> 0, …
## $ Address_line_1_Market.Place <dbl> 0, …
## $ Address_line_1_Market.Square <dbl> 0, …
## $ Address_line_1_Market.Street <dbl> 0, …
## $ Address_line_1_Markfield.Road <dbl> 0, …
## $ Address_line_1_Marlborough.Road <dbl> 0, …
## $ Address_line_1_Marshall.Place <dbl> 0, …
## $ Address_line_1_Martello.Tower <dbl> 0, …
## $ Address_line_1_Masonic.Hall <dbl> 0, …
## $ Address_line_1_McConnell.Drive <dbl> 0, …
## $ Address_line_1_Mercer.Park <dbl> 0, …
## $ Address_line_1_Micklegate <dbl> 0, …
## $ Address_line_1_Middleton.Hall <dbl> 0, …
## $ Address_line_1_Mileham.Road <dbl> 0, …
## $ Address_line_1_Mill.Lane <dbl> 0, …
## $ Address_line_1_Mill.Street <dbl> 0, …
## $ Address_line_1_Milton <dbl> 0, …
## $ Address_line_1_Minster.Road <dbl> 0, …
## $ Address_line_1_Moat.Park <dbl> 0, …
## $ Address_line_1_MoDA.Collections.Centre <dbl> 0, …
## $ Address_line_1_Montacute <dbl> 0, …
## $ Address_line_1_Montague.House <dbl> 0, …
## $ Address_line_1_Monument.Cl <dbl> 0, …
## $ Address_line_1_Moorside.Road <dbl> 0, …
## $ Address_line_1_Morgans.Lane <dbl> 0, …
## $ Address_line_1_Moss.Street <dbl> 0, …
## $ Address_line_1_Mottisfont <dbl> 0, …
## $ Address_line_1_Moyard.House <dbl> 0, …
## $ Address_line_1_Muggeridge.Farm <dbl> 0, …
## $ Address_line_1_Muller.House <dbl> 0, …
## $ Address_line_1_Museum.and.Arts.Centre <dbl> 0, …
## $ Address_line_1_Museum.and.Study.Collection <dbl> 0, …
## $ Address_line_1_Museum.of.South.Somerset <dbl> 0, …
## $ Address_line_1_Museum.Square <dbl> 0, …
## $ Address_line_1_Museum.Terrace <dbl> 0, …
## $ Address_line_1_Ninewells.Hospital.and.Medical.School <dbl> 0, …
## $ Address_line_1_No..2.Office.Block <dbl> 0, …
## $ Address_line_1_North.Frodingham.Cemetery <dbl> 0, …
## $ Address_line_1_North.Norfolk.Railway <dbl> 0, …
## $ Address_line_1_North.Road <dbl> 0, …
## $ Address_line_1_North.Street <dbl> 0, …
## $ Address_line_1_Norwich.Heritage...Regeneration.Trust <dbl> 0, …
## $ Address_line_1_Oaklands.Park <dbl> 0, …
## $ Address_line_1_Oakley.Wharf.No.1 <dbl> 0, …
## $ Address_line_1_Old.Blue.Mill <dbl> 0, …
## $ Address_line_1_Old.Congregational.Chapel <dbl> 0, …
## $ Address_line_1_Old.GVT.Engine.Shed <dbl> 0, …
## $ Address_line_1_Old.Mills.Road <dbl> 0, …
## $ Address_line_1_Old.Parish.Church <dbl> 0, …
## $ Address_line_1_Old.School <dbl> 0, …
## $ Address_line_1_Old.Station.Yard <dbl> 0, …
## $ Address_line_1_Old.Town <dbl> 0, …
## $ Address_line_1_Old.Tunbridge.Electricity.Generating.Station <dbl> 0, …
## $ Address_line_1_Oundle.Road <dbl> 0, …
## $ Address_line_1_Palace.Avenue.Arcade <dbl> 0, …
## $ Address_line_1_Palacerigg.Country.Park <dbl> 0, …
## $ Address_line_1_Parade.Street <dbl> 0, …
## $ Address_line_1_Park.Walk <dbl> 0, …
## $ Address_line_1_Parkinson.Building <dbl> 0, …
## $ Address_line_1_Paternoster.Row <dbl> 0, …
## $ Address_line_1_Paynes.Park <dbl> 0, …
## $ Address_line_1_Peel.Park <dbl> 0, …
## $ Address_line_1_Perth.Road <dbl> 0, …
## $ Address_line_1_Philipshill.Road <dbl> 0, …
## $ Address_line_1_Pickering.Road <dbl> 0, …
## $ Address_line_1_Pickering.Station <dbl> 0, …
## $ Address_line_1_Pier.Road <dbl> 0, …
## $ Address_line_1_Plas.Newydd <dbl> 0, …
## $ Address_line_1_Polesden.Lacey <dbl> 0, …
## $ Address_line_1_Pool <dbl> 0, …
## $ Address_line_1_Port.Sunlight.Village <dbl> 0, …
## $ Address_line_1_Prescott.Hill <dbl> 0, …
## $ Address_line_1_Preston.Park <dbl> 0, …
## $ Address_line_1_Prestongrange <dbl> 0, …
## $ Address_line_1_Priory.Lodge <dbl> 0, …
## $ Address_line_1_Priory.Row <dbl> 0, …
## $ Address_line_1_Purfleet.Quay <dbl> 0, …
## $ Address_line_1_Queen.Street <dbl> 0, …
## $ Address_line_1_Rail.Yard <dbl> 0, …
## $ Address_line_1_Ravenor.Farm <dbl> 0, …
## $ Address_line_1_Ravenshead <dbl> 0, …
## $ Address_line_1_Ravenspoint <dbl> 0, …
## $ Address_line_1_Rayrigg.Road <dbl> 0, …
## $ Address_line_1_Rectory.Place <dbl> 0, …
## $ Address_line_1_Regional.Resource.Centre <dbl> 0, …
## $ Address_line_1_Repton.School <dbl> 0, …
## $ Address_line_1_Riccarton <dbl> 0, …
## $ Address_line_1_Rishworth.Road <dbl> 0, …
## $ Address_line_1_River.Court <dbl> 0, …
## $ Address_line_1_Riverside <dbl> 0, …
## $ Address_line_1_Riverside.Rd <dbl> 0, …
## $ Address_line_1_Riversley.Park <dbl> 0, …
## $ Address_line_1_Robinsons.School <dbl> 0, …
## $ Address_line_1_Roderick.Bowen.Library <dbl> 0, …
## $ Address_line_1_Rowleys.House.Museum <dbl> 0, …
## $ Address_line_1_Royal.Academy.of.Music <dbl> 0, …
## $ Address_line_1_Royal.Hospital.Road <dbl> 0, …
## $ Address_line_1_Royal.Marines.Museum <dbl> 0, …
## $ Address_line_1_Royal.Oak.Farm <dbl> 0, …
## $ Address_line_1_Royal.Victoria.Arcade <dbl> 0, …
## $ Address_line_1_Salisbury.Hall <dbl> 0, …
## $ Address_line_1_Sally.Lunns.House <dbl> 0, …
## $ Address_line_1_Sandown.Airport <dbl> 0, …
## $ Address_line_1_Sandown.Barrack.Battery <dbl> 0, …
## $ Address_line_1_Scalasaig.Harbour <dbl> 0, …
## $ Address_line_1_School.House <dbl> 0, …
## $ Address_line_1_School.Road <dbl> 0, …
## $ Address_line_1_Schoolhouse <dbl> 0, …
## $ Address_line_1_Sevenoaks.Library <dbl> 0, …
## $ Address_line_1_Shapwick.Road <dbl> 0, …
## $ Address_line_1_Shenstone.Drive <dbl> 0, …
## $ Address_line_1_Sheringham.Station <dbl> 0, …
## $ Address_line_1_Shibden.Hall <dbl> 0, …
## $ Address_line_1_Shugborough.Estate <dbl> 0, …
## $ Address_line_1_Signal.Tower <dbl> 0, …
## $ Address_line_1_Singleton <dbl> 0, …
## $ Address_line_1_Sir.John.Barrow.Cottage <dbl> 0, …
## $ Address_line_1_Sir.Kenneth.Green.Library <dbl> 0, …
## $ Address_line_1_Slatequarry.Road <dbl> 0, …
## $ Address_line_1_Sloe.Lane <dbl> 0, …
## $ Address_line_1_Smallhythe <dbl> 0, …
## $ Address_line_1_Soho.Cottages <dbl> 0, …
## $ Address_line_1_Somerset.House <dbl> 0, …
## $ Address_line_1_South.Shore <dbl> 0, …
## $ Address_line_1_Southey.Works <dbl> 0, …
## $ Address_line_1_Sparrows.Nest.Gardens <dbl> 0, …
## $ Address_line_1_Spicehall.Park.Road <dbl> 0, …
## $ Address_line_1_Springtown.Road <dbl> 0, …
## $ Address_line_1_St.Ayles <dbl> 0, …
## $ Address_line_1_St.Fagans <dbl> 0, …
## $ Address_line_1_St.Marys.Gate <dbl> 0, …
## $ Address_line_1_St.Mathews.Street <dbl> 0, …
## $ Address_line_1_St.Nicholas.Circle <dbl> 0, …
## $ Address_line_1_St.Peters.Street <dbl> 0, …
## $ Address_line_1_St.Wilfreds.Chapel <dbl> 0, …
## $ Address_line_1_St..Keyne.Station <dbl> 0, …
## $ Address_line_1_St..Marys.Hospital <dbl> 0, …
## $ Address_line_1_Stanley.Street <dbl> 0, …
## $ Address_line_1_Station.Approach <dbl> 0, …
## $ Address_line_1_Station.Road <dbl> 0, …
## $ Address_line_1_Station.Yard <dbl> 0, …
## $ Address_line_1_Steeple.Lane <dbl> 0, …
## $ Address_line_1_Stock.Woods <dbl> 0, …
## $ Address_line_1_Street.House <dbl> 0, …
## $ Address_line_1_Summer.Lane <dbl> 0, …
## $ Address_line_1_Surrey.Fire.and.Rescue.Service.Headquarters <dbl> 0, …
## $ Address_line_1_Surrey.Street <dbl> 0, …
## $ Address_line_1_TA.Centre <dbl> 0, …
## $ Address_line_1_Tameside.Central.Library...Art.Gallery <dbl> 0, …
## $ Address_line_1_Tannaghmore.Gardens <dbl> 0, …
## $ Address_line_1_Tarbatness.Road <dbl> 0, …
## $ Address_line_1_Tatton.Park <dbl> 0, …
## $ Address_line_1_Temple.Newsam.Estate <dbl> 0, …
## $ Address_line_1_Territorial.Army.Centre <dbl> 0, …
## $ Address_line_1_Thames.Valley.Police.Training.Centre <dbl> 0, …
## $ Address_line_1_Thameside.Complex <dbl> 0, …
## $ Address_line_1_The.Agricultural.Museum <dbl> 0, …
## $ Address_line_1_The.Arched.House <dbl> 0, …
## $ Address_line_1_The.Ark <dbl> 0, …
## $ Address_line_1_The.Barge.MV.Confiance <dbl> 0, …
## $ Address_line_1_The.Broadway <dbl> 0, …
## $ Address_line_1_The.Butts <dbl> 0, …
## $ Address_line_1_The.Castle <dbl> 0, …
## $ Address_line_1_The.Castleton.Centre <dbl> 0, …
## $ Address_line_1_The.Cathedral <dbl> 0, …
## $ Address_line_1_The.Clease <dbl> 0, …
## $ Address_line_1_The.Coliseum <dbl> 0, …
## $ Address_line_1_The.Discovery.Centre..Aberdeenshire.Museums.Service <dbl> 0, …
## $ Address_line_1_The.Former.Guardroom..Building.40 <dbl> 0, …
## $ Address_line_1_The.Friary <dbl> 0, …
## $ Address_line_1_The.Grain.Store <dbl> 0, …
## $ Address_line_1_The.Harbour <dbl> 0, …
## $ Address_line_1_The.Headrow <dbl> 0, …
## $ Address_line_1_The.Heritage.Centre <dbl> 0, …
## $ Address_line_1_The.Institute <dbl> 0, …
## $ Address_line_1_The.Keep <dbl> 0, …
## $ Address_line_1_The.Langdon.Down.Centre <dbl> 0, …
## $ Address_line_1_The.Malt.Barn <dbl> 0, …
## $ Address_line_1_The.Mansion <dbl> 0, …
## $ Address_line_1_The.Market.House <dbl> 0, …
## $ Address_line_1_The.Mens.Institute <dbl> 0, …
## $ Address_line_1_The.Mint <dbl> 0, …
## $ Address_line_1_The.Newarke <dbl> 0, …
## $ Address_line_1_The.Octagon <dbl> 0, …
## $ Address_line_1_The.Old.Fire.Station <dbl> 0, …
## $ Address_line_1_The.Old.Foundry <dbl> 0, …
## $ Address_line_1_The.Old.Grammar.School <dbl> 0, …
## $ Address_line_1_The.Old.Guildhall <dbl> 0, …
## $ Address_line_1_The.Old.Library <dbl> 0, …
## $ Address_line_1_The.Old.Parsonage <dbl> 0, …
## $ Address_line_1_The.Old.Police.Station <dbl> 0, …
## $ Address_line_1_The.Old.Pottery <dbl> 0, …
## $ Address_line_1_The.Old.Railway.Station <dbl> 0, …
## $ Address_line_1_The.Old.School.House <dbl> 0, …
## $ Address_line_1_The.Old.Schoolmasters.House <dbl> 0, …
## $ Address_line_1_The.Old.Station <dbl> 0, …
## $ Address_line_1_The.Old.Workhouse <dbl> 0, …
## $ Address_line_1_The.Phoenix.Centre <dbl> 0, …
## $ Address_line_1_The.Prebendal.Manor.House <dbl> 0, …
## $ Address_line_1_The.Precinct <dbl> 0, …
## $ Address_line_1_The.Priory <dbl> 0, …
## $ Address_line_1_The.Quays <dbl> 0, …
## $ Address_line_1_The.Railway.Station <dbl> 0, …
## $ Address_line_1_The.Redoubt.Fortress <dbl> 0, …
## $ Address_line_1_The.Royal.Chantry <dbl> 0, …
## $ Address_line_1_The.Scottish.Infantry.Depot <dbl> 0, …
## $ Address_line_1_The.Shallows <dbl> 0, …
## $ Address_line_1_The.Square <dbl> 0, …
## $ Address_line_1_The.Stones <dbl> 0, …
## $ Address_line_1_The.Street <dbl> 0, …
## $ Address_line_1_The.Tolmen.Centre <dbl> 0, …
## $ Address_line_1_The.Town.Hall <dbl> 0, …
## $ Address_line_1_The.Town.House <dbl> 0, …
## $ Address_line_1_The.Tree <dbl> 0, …
## $ Address_line_1_The.Tribunal <dbl> 0, …
## $ Address_line_1_The.Undercroft <dbl> 0, …
## $ Address_line_1_The.University <dbl> 0, …
## $ Address_line_1_The.University.of.Birmingham <dbl> 0, …
## $ Address_line_1_The.University.of.Newcastle <dbl> 0, …
## $ Address_line_1_The.Valley <dbl> 0, …
## $ Address_line_1_The.Warren <dbl> 0, …
## $ Address_line_1_The.Wharf <dbl> 0, …
## $ Address_line_1_The.Yard <dbl> 0, …
## $ Address_line_1_Theatre.Yard <dbl> 0, …
## $ Address_line_1_Thoresby.Courtyard <dbl> 0, …
## $ Address_line_1_Throwleigh <dbl> 0, …
## $ Address_line_1_Tide.Mill.Way <dbl> 0, …
## $ Address_line_1_Tolgus.Mount <dbl> 0, …
## $ Address_line_1_Tolhouse.Street <dbl> 0, …
## $ Address_line_1_Tollcross.Park <dbl> 0, …
## $ Address_line_1_Town.Hall <dbl> 0, …
## $ Address_line_1_Town.Hall.Complex <dbl> 0, …
## $ Address_line_1_Town.House.West <dbl> 0, …
## $ Address_line_1_Town.Street <dbl> 0, …
## $ Address_line_1_Tredegar.House <dbl> 0, …
## $ Address_line_1_Tresco.Estate <dbl> 0, …
## $ Address_line_1_Trinity.Road <dbl> 0, …
## $ Address_line_1_Trinity.Street <dbl> 0, …
## $ Address_line_1_Trumpington.Street <dbl> 0, …
## $ Address_line_1_Tudor.Barn <dbl> 0, …
## $ Address_line_1_Tudor.House <dbl> 0, …
## $ Address_line_1_Tudor.Road <dbl> 0, …
## $ Address_line_1_Twelvetrees.Crescent <dbl> 0, …
## $ Address_line_1_Unit.5 <dbl> 0, …
## $ Address_line_1_Units.1 <dbl> 0, …
## $ Address_line_1_University.Archives.and.Special.Collections.Centre <dbl> 0, …
## $ Address_line_1_University.College.London <dbl> 0, …
## $ Address_line_1_University.of.East.Anglia <dbl> 0, …
## $ Address_line_1_University.of.Manchester <dbl> 0, …
## $ Address_line_1_University.of.Warwick <dbl> 0, …
## $ Address_line_1_Upper.Hall <dbl> 0, …
## $ Address_line_1_Urbis.Building <dbl> 0, …
## $ Address_line_1_Valley.Road <dbl> 0, …
## $ Address_line_1_Verdant.Works <dbl> 0, …
## $ Address_line_1_Vestry.Road <dbl> 0, …
## $ Address_line_1_Vicars.Lane <dbl> 0, …
## $ Address_line_1_Victoria.Road <dbl> 0, …
## $ Address_line_1_Victoria.Station <dbl> 0, …
## $ Address_line_1_Village.Hall <dbl> 0, …
## $ Address_line_1_War.Memorial.Building <dbl> 0, …
## $ Address_line_1_War.Memorial.Gardens <dbl> 0, …
## $ Address_line_1_Wardown.Park <dbl> 0, …
## $ Address_line_1_Warhammer.World <dbl> 0, …
## $ Address_line_1_Warwickshire.County.Cricket.Club <dbl> 0, …
## $ Address_line_1_Welbeck.Estate <dbl> 0, …
## $ Address_line_1_Wellington.Mill <dbl> 0, …
## $ Address_line_1_West.High.Down <dbl> 0, …
## $ Address_line_1_West.Hoathly <dbl> 0, …
## $ Address_line_1_West.Strand <dbl> 0, …
## $ Address_line_1_Westminster.Synagogue <dbl> 0, …
## $ Address_line_1_Whisby.Road <dbl> 0, …
## $ Address_line_1_Whitburn.Library <dbl> 0, …
## $ Address_line_1_White.Lodge <dbl> 0, …
## $ Address_line_1_Whitefriars <dbl> 0, …
## $ Address_line_1_Wightwick.Bank <dbl> 0, …
## $ Address_line_1_Willenhall.Library <dbl> 0, …
## $ Address_line_1_Willesden.Green.Library.Centre <dbl> 0, …
## $ Address_line_1_Willowburn.Trading.Estate <dbl> 0, …
## $ Address_line_1_Willows.and.Wetlands.Visitor.Centre <dbl> 0, …
## $ Address_line_1_Wills.Memorial.Building <dbl> 0, …
## $ Address_line_1_Winchester.Road <dbl> 0, …
## $ Address_line_1_Winthorpe.Airfield <dbl> 0, …
## $ Address_line_1_Woodhorn.Village <dbl> 0, …
## $ Address_line_1_Wookey.Hole <dbl> 0, …
## $ Address_line_1_Worcestershire.Royal.Hospital <dbl> 0, …
## $ Address_line_1_Worsbrough.Bridge <dbl> 0, …
## $ Address_line_1_Yanmouth <dbl> 0, …
## $ Address_line_1_York.Avenue <dbl> 0, …
## $ Address_line_1_Ysgubor.Lleucu <dbl> 0, …
## $ Address_line_1_new <dbl> 0, …
## $ Governance_Government.Local_Authority <dbl> 0, …
## $ Governance_Government.National <dbl> 0, …
## $ Governance_Government.Other <dbl> 0, …
## $ Governance_Independent.English_Heritage <dbl> 0, …
## $ Governance_Independent.Historic_Environment_Scotland <dbl> 0, …
## $ Governance_Independent.National_Trust <dbl> 0, …
## $ Governance_Independent.National_Trust_for_Scotland <dbl> 0, …
## $ Governance_Independent.Not_for_profit <dbl> 1, …
## $ Governance_Independent.Private <dbl> 0, …
## $ Governance_Independent.Unknown <dbl> 0, …
## $ Governance_University <dbl> 0, …
## $ Governance_Unknown <dbl> 0, …
## $ Governance_new <dbl> 0, …
## $ Size_large <dbl> 0, …
## $ Size_medium <dbl> 0, …
## $ Size_small <dbl> 1, …
## $ Size_unknown <dbl> 0, …
## $ Size_new <dbl> 0, …
## $ Subject_Matter_Archaeology.Greek_and_Egyptian <dbl> 0, …
## $ Subject_Matter_Archaeology.Medieval <dbl> 0, …
## $ Subject_Matter_Archaeology.Mixed <dbl> 0, …
## $ Subject_Matter_Archaeology.Other <dbl> 0, …
## $ Subject_Matter_Archaeology.Prehistory <dbl> 0, …
## $ Subject_Matter_Archaeology.Roman <dbl> 0, …
## $ Subject_Matter_Arts.Ceramics <dbl> 0, …
## $ Subject_Matter_Arts.Costume_and_textiles <dbl> 0, …
## $ Subject_Matter_Arts.Crafts <dbl> 0, …
## $ Subject_Matter_Arts.Design <dbl> 0, …
## $ Subject_Matter_Arts.Fine_and_decorative_arts <dbl> 0, …
## $ Subject_Matter_Arts.Glass <dbl> 0, …
## $ Subject_Matter_Arts.Literature <dbl> 0, …
## $ Subject_Matter_Arts.Music <dbl> 0, …
## $ Subject_Matter_Arts.Other <dbl> 0, …
## $ Subject_Matter_Arts.Photography <dbl> 0, …
## $ Subject_Matter_Belief_and_identity <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Church_treasuries <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Ethnic_group <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Freemasons <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Other <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Religion <dbl> 0, …
## $ Subject_Matter_Belief_and_identity.Religious_buildings <dbl> 0, …
## $ Subject_Matter_Buildings.Civic <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Large_houses <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Medium_houses <dbl> 0, …
## $ Subject_Matter_Buildings.Houses.Small_houses <dbl> 0, …
## $ Subject_Matter_Buildings.Other <dbl> 0, …
## $ Subject_Matter_Buildings.Palace <dbl> 0, …
## $ Subject_Matter_Buildings.Penal <dbl> 0, …
## $ Subject_Matter_Buildings.School <dbl> 0, …
## $ Subject_Matter_Buildings.Shops <dbl> 0, …
## $ Subject_Matter_Communications <dbl> 0, …
## $ Subject_Matter_Communications.Other <dbl> 0, …
## $ Subject_Matter_Communications.Post <dbl> 0, …
## $ Subject_Matter_Communications.Radio <dbl> 0, …
## $ Subject_Matter_Food_and_drink <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Clocks_and_watches <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Industrial_life <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Metals <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Mining_and_quarrying <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Mixed <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Other <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Potteries <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Print <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Steam_and_engines <dbl> 0, …
## $ Subject_Matter_Industry_and_manufacture.Textiles <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Cricket <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Fairgrounds_and_amusements <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Film_Cinema_and_TV <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Other <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Rugby_and_football <dbl> 0, …
## $ Subject_Matter_Leisure_and_sport.Toys_and_models <dbl> 0, …
## $ Subject_Matter_Local_Histories <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Hospital <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Other <dbl> 0, …
## $ Subject_Matter_Medicine_and_health.Professional_association <dbl> 0, …
## $ Subject_Matter_Mixed.Bygones <dbl> 0, …
## $ Subject_Matter_Mixed.Encyclopaedic <dbl> 0, …
## $ Subject_Matter_Mixed.Other <dbl> 0, …
## $ Subject_Matter_Natural_world <dbl> 0, …
## $ Subject_Matter_Natural_world.Dinosaurs <dbl> 1, …
## $ Subject_Matter_Natural_world.Fossils <dbl> 0, …
## $ Subject_Matter_Natural_world.Geology <dbl> 0, …
## $ Subject_Matter_Natural_world.Herbaria_and_gardening <dbl> 0, …
## $ Subject_Matter_Natural_world.Mixed <dbl> 0, …
## $ Subject_Matter_Natural_world.Other <dbl> 0, …
## $ Subject_Matter_Natural_world.Zoology <dbl> 0, …
## $ Subject_Matter_Other <dbl> 0, …
## $ Subject_Matter_Personality.Art <dbl> 0, …
## $ Subject_Matter_Personality.Explorer <dbl> 0, …
## $ Subject_Matter_Personality.Literary <dbl> 0, …
## $ Subject_Matter_Personality.Music <dbl> 0, …
## $ Subject_Matter_Personality.Other <dbl> 0, …
## $ Subject_Matter_Personality.Political <dbl> 0, …
## $ Subject_Matter_Personality.Religious <dbl> 0, …
## $ Subject_Matter_Personality.Scientific <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Farming <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Forges <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Other <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Rural_life <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Textiles <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Watermills <dbl> 0, …
## $ Subject_Matter_Rural_Industry.Windmills <dbl> 0, …
## $ Subject_Matter_Science_and_technology.Computing_and_gaming <dbl> 0, …
## $ Subject_Matter_Science_and_technology.Other <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Boats_and_ships <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Fishing <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Lighthouses <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Mixed <dbl> 0, …
## $ Subject_Matter_Sea_and_seafaring.Other <dbl> 0, …
## $ Subject_Matter_Services.Fire <dbl> 0, …
## $ Subject_Matter_Services.Other <dbl> 0, …
## $ Subject_Matter_Services.Police <dbl> 0, …
## $ Subject_Matter_Services.RNLI <dbl> 0, …
## $ Subject_Matter_Transport.Aviation <dbl> 0, …
## $ Subject_Matter_Transport.Bicycles <dbl> 0, …
## $ Subject_Matter_Transport.Buses_and_trams <dbl> 0, …
## $ Subject_Matter_Transport.Canals <dbl> 0, …
## $ Subject_Matter_Transport.Cars_and_motorbikes <dbl> 0, …
## $ Subject_Matter_Transport.Mixed <dbl> 0, …
## $ Subject_Matter_Transport.Other <dbl> 0, …
## $ Subject_Matter_Transport.Trains_and_railways <dbl> 0, …
## $ Subject_Matter_Utilities.Gas_and_electricity <dbl> 0, …
## $ Subject_Matter_Utilities.Water_and_waste <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Airforce <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Bunker <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Castles_and_forts <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Event_or_site <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Military <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Navy <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Other <dbl> 0, …
## $ Subject_Matter_War_and_conflict.Regiment <dbl> 0, …
## $ Subject_Matter_new <dbl> 0, …
## $ Year_opened_X1759 <dbl> 0, …
## $ Year_opened_X1761 <dbl> 0, …
## $ Year_opened_X1816 <dbl> 0, …
## $ Year_opened_X1823 <dbl> 0, …
## $ Year_opened_X1829 <dbl> 0, …
## $ Year_opened_X1835 <dbl> 0, …
## $ Year_opened_X1837 <dbl> 0, …
## $ Year_opened_X1845 <dbl> 0, …
## $ Year_opened_X1846 <dbl> 0, …
## $ Year_opened_X1847 <dbl> 0, …
## $ Year_opened_X1850 <dbl> 0, …
## $ Year_opened_X1852 <dbl> 0, …
## $ Year_opened_X1857 <dbl> 0, …
## $ Year_opened_X1859 <dbl> 0, …
## $ Year_opened_X1860 <dbl> 0, …
## $ Year_opened_X1864 <dbl> 0, …
## $ Year_opened_X1866 <dbl> 0, …
## $ Year_opened_X1867 <dbl> 0, …
## $ Year_opened_X1868 <dbl> 0, …
## $ Year_opened_X1869 <dbl> 0, …
## $ Year_opened_X1876 <dbl> 0, …
## $ Year_opened_X1878 <dbl> 0, …
## $ Year_opened_X1880 <dbl> 0, …
## $ Year_opened_X1881 <dbl> 0, …
## $ Year_opened_X1883 <dbl> 0, …
## $ Year_opened_X1884 <dbl> 0, …
## $ Year_opened_X1885 <dbl> 0, …
## $ Year_opened_X1886 <dbl> 0, …
## $ Year_opened_X1888 <dbl> 0, …
## $ Year_opened_X1889 <dbl> 0, …
## $ Year_opened_X1890 <dbl> 0, …
## $ Year_opened_X1892 <dbl> 0, …
## $ Year_opened_X1893 <dbl> 0, …
## $ Year_opened_X1895 <dbl> 0, …
## $ Year_opened_X1896 <dbl> 0, …
## $ Year_opened_X1898 <dbl> 0, …
## $ Year_opened_X1899 <dbl> 0, …
## $ Year_opened_X1900 <dbl> 0, …
## $ Year_opened_X1901 <dbl> 0, …
## $ Year_opened_X1902 <dbl> 0, …
## $ Year_opened_X1905 <dbl> 0, …
## $ Year_opened_X1906 <dbl> 0, …
## $ Year_opened_X1907 <dbl> 0, …
## $ Year_opened_X1908 <dbl> 0, …
## $ Year_opened_X1911 <dbl> 0, …
## $ Year_opened_X1912 <dbl> 0, …
## $ Year_opened_X1913 <dbl> 0, …
## $ Year_opened_X1914 <dbl> 0, …
## $ Year_opened_X1915 <dbl> 0, …
## $ Year_opened_X1916 <dbl> 0, …
## $ Year_opened_X1917 <dbl> 0, …
## $ Year_opened_X1919 <dbl> 0, …
## $ Year_opened_X1920 <dbl> 0, …
## $ Year_opened_X1922 <dbl> 0, …
## $ Year_opened_X1923 <dbl> 0, …
## $ Year_opened_X1924 <dbl> 0, …
## $ Year_opened_X1925 <dbl> 0, …
## $ Year_opened_X1926 <dbl> 0, …
## $ Year_opened_X1927 <dbl> 0, …
## $ Year_opened_X1928 <dbl> 0, …
## $ Year_opened_X1929 <dbl> 0, …
## $ Year_opened_X1930 <dbl> 0, …
## $ Year_opened_X1931 <dbl> 0, …
## $ Year_opened_X1932 <dbl> 0, …
## $ Year_opened_X1933 <dbl> 0, …
## $ Year_opened_X1934 <dbl> 0, …
## $ Year_opened_X1935 <dbl> 0, …
## $ Year_opened_X1936 <dbl> 0, …
## $ Year_opened_X1937 <dbl> 0, …
## $ Year_opened_X1938 <dbl> 0, …
## $ Year_opened_X1941 <dbl> 0, …
## $ Year_opened_X1942 <dbl> 0, …
## $ Year_opened_X1944 <dbl> 0, …
## $ Year_opened_X1945 <dbl> 0, …
## $ Year_opened_X1946 <dbl> 0, …
## $ Year_opened_X1947 <dbl> 0, …
## $ Year_opened_X1948 <dbl> 0, …
## $ Year_opened_X1949 <dbl> 0, …
## $ Year_opened_X1950 <dbl> 0, …
## $ Year_opened_X1951 <dbl> 0, …
## $ Year_opened_X1952 <dbl> 0, …
## $ Year_opened_X1953 <dbl> 0, …
## $ Year_opened_X1954 <dbl> 0, …
## $ Year_opened_X1955 <dbl> 0, …
## $ Year_opened_X1956 <dbl> 0, …
## $ Year_opened_X1957 <dbl> 0, …
## $ Year_opened_X1958 <dbl> 0, …
## $ Year_opened_X1959 <dbl> 0, …
## $ Year_opened_X1960 <dbl> 0, …
## $ Year_opened_X1961 <dbl> 0, …
## $ Year_opened_X1962 <dbl> 0, …
## $ Year_opened_X1963 <dbl> 0, …
## $ Year_opened_X1964 <dbl> 0, …
## $ Year_opened_X1965 <dbl> 0, …
## $ Year_opened_X1966 <dbl> 0, …
## $ Year_opened_X1967 <dbl> 0, …
## $ Year_opened_X1968 <dbl> 0, …
## $ Year_opened_X1969 <dbl> 0, …
## $ Year_opened_X1970 <dbl> 0, …
## $ Year_opened_X1971 <dbl> 0, …
## $ Year_opened_X1972 <dbl> 0, …
## $ Year_opened_X1973 <dbl> 0, …
## $ Year_opened_X1974 <dbl> 0, …
## $ Year_opened_X1975 <dbl> 0, …
## $ Year_opened_X1976 <dbl> 0, …
## $ Year_opened_X1977 <dbl> 0, …
## $ Year_opened_X1978 <dbl> 0, …
## $ Year_opened_X1979 <dbl> 0, …
## $ Year_opened_X1980 <dbl> 0, …
## $ Year_opened_X1981 <dbl> 0, …
## $ Year_opened_X1982 <dbl> 0, …
## $ Year_opened_X1983 <dbl> 0, …
## $ Year_opened_X1984 <dbl> 0, …
## $ Year_opened_X1985 <dbl> 0, …
## $ Year_opened_X1986 <dbl> 0, …
## $ Year_opened_X1987 <dbl> 0, …
## $ Year_opened_X1988 <dbl> 0, …
## $ Year_opened_X1989 <dbl> 0, …
## $ Year_opened_X1990 <dbl> 0, …
## $ Year_opened_X1991 <dbl> 0, …
## $ Year_opened_X1992 <dbl> 0, …
## $ Year_opened_X1993 <dbl> 0, …
## $ Year_opened_X1994 <dbl> 0, …
## $ Year_opened_X1995 <dbl> 0, …
## $ Year_opened_X1996 <dbl> 0, …
## $ Year_opened_X1997 <dbl> 0, …
## $ Year_opened_X1998 <dbl> 0, …
## $ Year_opened_X1999 <dbl> 0, …
## $ Year_opened_X2000 <dbl> 0, …
## $ Year_opened_X2001 <dbl> 0, …
## $ Year_opened_X2002 <dbl> 0, …
## $ Year_opened_X2003 <dbl> 0, …
## $ Year_opened_X2004 <dbl> 0, …
## $ Year_opened_X2005 <dbl> 0, …
## $ Year_opened_X2006 <dbl> 0, …
## $ Year_opened_X2007 <dbl> 0, …
## $ Year_opened_X2008 <dbl> 0, …
## $ Year_opened_X2009 <dbl> 0, …
## $ Year_opened_X2010 <dbl> 0, …
## $ Year_opened_X2011 <dbl> 0, …
## $ Year_opened_X2012 <dbl> 0, …
## $ Year_opened_X2013 <dbl> 1, …
## $ Year_opened_X2014 <dbl> 0, …
## $ Year_opened_X2015 <dbl> 0, …
## $ Year_opened_X2016 <dbl> 0, …
## $ Year_opened_X2017 <dbl> 0, …
## $ Year_opened_X2018 <dbl> 0, …
## $ Year_opened_X2019 <dbl> 0, …
## $ Year_opened_X2020 <dbl> 0, …
## $ Year_opened_X2021 <dbl> 0, …
## $ Year_opened_new <dbl> 0, …
## $ Area_Deprivation_index_X2 <dbl> 0, …
## $ Area_Deprivation_index_X3 <dbl> 0, …
## $ Area_Deprivation_index_X4 <dbl> 1, …
## $ Area_Deprivation_index_X5 <dbl> 0, …
## $ Area_Deprivation_index_X6 <dbl> 0, …
## $ Area_Deprivation_index_X7 <dbl> 0, …
## $ Area_Deprivation_index_X8 <dbl> 0, …
## $ Area_Deprivation_index_X9 <dbl> 0, …
## $ Area_Deprivation_index_X10 <dbl> 0, …
## $ Area_Deprivation_index_new <dbl> 0, …
## $ Area_Deprivation_index_crime_X2 <dbl> 0, …
## $ Area_Deprivation_index_crime_X3 <dbl> 0, …
## $ Area_Deprivation_index_crime_X4 <dbl> 0, …
## $ Area_Deprivation_index_crime_X5 <dbl> 0, …
## $ Area_Deprivation_index_crime_X6 <dbl> 0, …
## $ Area_Deprivation_index_crime_X7 <dbl> 0, …
## $ Area_Deprivation_index_crime_X8 <dbl> 0, …
## $ Area_Deprivation_index_crime_X9 <dbl> 1, …
## $ Area_Deprivation_index_crime_X10 <dbl> 0, …
## $ Area_Deprivation_index_crime_new <dbl> 0, …
## $ Area_Deprivation_index_education_X2 <dbl> 0, …
## $ Area_Deprivation_index_education_X3 <dbl> 0, …
## $ Area_Deprivation_index_education_X4 <dbl> 0, …
## $ Area_Deprivation_index_education_X5 <dbl> 0, …
## $ Area_Deprivation_index_education_X6 <dbl> 1, …
## $ Area_Deprivation_index_education_X7 <dbl> 0, …
## $ Area_Deprivation_index_education_X8 <dbl> 0, …
## $ Area_Deprivation_index_education_X9 <dbl> 0, …
## $ Area_Deprivation_index_education_X10 <dbl> 0, …
## $ Area_Deprivation_index_education_new <dbl> 0, …
## $ Area_Deprivation_index_employment_X2 <dbl> 0, …
## $ Area_Deprivation_index_employment_X3 <dbl> 0, …
## $ Area_Deprivation_index_employment_X4 <dbl> 0, …
## $ Area_Deprivation_index_employment_X5 <dbl> 0, …
## $ Area_Deprivation_index_employment_X6 <dbl> 1, …
## $ Area_Deprivation_index_employment_X7 <dbl> 0, …
## $ Area_Deprivation_index_employment_X8 <dbl> 0, …
## $ Area_Deprivation_index_employment_X9 <dbl> 0, …
## $ Area_Deprivation_index_employment_X10 <dbl> 0, …
## $ Area_Deprivation_index_employment_new <dbl> 0, …
## $ Area_Deprivation_index_health_X2 <dbl> 0, …
## $ Area_Deprivation_index_health_X3 <dbl> 0, …
## $ Area_Deprivation_index_health_X4 <dbl> 0, …
## $ Area_Deprivation_index_health_X5 <dbl> 0, …
## $ Area_Deprivation_index_health_X6 <dbl> 0, …
## $ Area_Deprivation_index_health_X7 <dbl> 0, …
## $ Area_Deprivation_index_health_X8 <dbl> 1, …
## $ Area_Deprivation_index_health_X9 <dbl> 0, …
## $ Area_Deprivation_index_health_X10 <dbl> 0, …
## $ Area_Deprivation_index_health_new <dbl> 0, …
## $ Area_Deprivation_index_housing_X2 <dbl> 0, …
## $ Area_Deprivation_index_housing_X3 <dbl> 0, …
## $ Area_Deprivation_index_housing_X4 <dbl> 0, …
## $ Area_Deprivation_index_housing_X5 <dbl> 0, …
## $ Area_Deprivation_index_housing_X6 <dbl> 0, …
## $ Area_Deprivation_index_housing_X7 <dbl> 0, …
## $ Area_Deprivation_index_housing_X8 <dbl> 0, …
## $ Area_Deprivation_index_housing_X9 <dbl> 0, …
## $ Area_Deprivation_index_housing_X10 <dbl> 0, …
## $ Area_Deprivation_index_housing_new <dbl> 0, …
## $ Area_Deprivation_index_income_X2 <dbl> 0, …
## $ Area_Deprivation_index_income_X3 <dbl> 0, …
## $ Area_Deprivation_index_income_X4 <dbl> 0, …
## $ Area_Deprivation_index_income_X5 <dbl> 0, …
## $ Area_Deprivation_index_income_X6 <dbl> 0, …
## $ Area_Deprivation_index_income_X7 <dbl> 1, …
## $ Area_Deprivation_index_income_X8 <dbl> 0, …
## $ Area_Deprivation_index_income_X9 <dbl> 0, …
## $ Area_Deprivation_index_income_X10 <dbl> 0, …
## $ Area_Deprivation_index_income_new <dbl> 0, …
## $ Area_Deprivation_index_services_X2 <dbl> 0, …
## $ Area_Deprivation_index_services_X3 <dbl> 0, …
## $ Area_Deprivation_index_services_X4 <dbl> 0, …
## $ Area_Deprivation_index_services_X5 <dbl> 0, …
## $ Area_Deprivation_index_services_X6 <dbl> 0, …
## $ Area_Deprivation_index_services_X7 <dbl> 0, …
## $ Area_Deprivation_index_services_X8 <dbl> 0, …
## $ Area_Deprivation_index_services_X9 <dbl> 0, …
## $ Area_Deprivation_index_services_X10 <dbl> 0, …
## $ Area_Deprivation_index_services_new <dbl> 0, …
## $ Area_Geodemographic_group_code_X1br <dbl> 0, …
## $ Area_Geodemographic_group_code_X2ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X2br <dbl> 0, …
## $ Area_Geodemographic_group_code_X3ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X3br <dbl> 1, …
## $ Area_Geodemographic_group_code_X3cr <dbl> 0, …
## $ Area_Geodemographic_group_code_X4ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X5ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X6ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X6br <dbl> 0, …
## $ Area_Geodemographic_group_code_X7ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X7br <dbl> 0, …
## $ Area_Geodemographic_group_code_X7cr <dbl> 0, …
## $ Area_Geodemographic_group_code_X8ar <dbl> 0, …
## $ Area_Geodemographic_group_code_X8br <dbl> 0, …
## $ Area_Geodemographic_group_code_new <dbl> 0, …
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
doParallel::registerDoParallel()
set.seed(17375)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5,
control = control_grid(save_pred = TRUE))
## Warning: package 'xgboost' was built under R version 4.3.3
collect_metrics(xgboost_tune)
## # A tibble: 15 × 12
## trees min_n tree_depth learn_rate loss_reduction sample_size .metric
## <int> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 677 7 14 0.00195 5.18e- 4 0.477 accuracy
## 2 677 7 14 0.00195 5.18e- 4 0.477 brier_class
## 3 677 7 14 0.00195 5.18e- 4 0.477 roc_auc
## 4 1016 11 4 0.00394 3.13e-10 0.723 accuracy
## 5 1016 11 4 0.00394 3.13e-10 0.723 brier_class
## 6 1016 11 4 0.00394 3.13e-10 0.723 roc_auc
## 7 1626 19 8 0.0202 2.06e- 7 0.257 accuracy
## 8 1626 19 8 0.0202 2.06e- 7 0.257 brier_class
## 9 1626 19 8 0.0202 2.06e- 7 0.257 roc_auc
## 10 1483 30 5 0.0873 5.18e- 3 0.401 accuracy
## 11 1483 30 5 0.0873 5.18e- 3 0.401 brier_class
## 12 1483 30 5 0.0873 5.18e- 3 0.401 roc_auc
## 13 111 39 12 0.238 4.91e- 1 0.850 accuracy
## 14 111 39 12 0.238 4.91e- 1 0.850 brier_class
## 15 111 39 12 0.238 4.91e- 1 0.850 roc_auc
## # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
## # .config <chr>
collect_predictions(xgboost_tune) %>%
group_by(id) %>%
roc_curve(Accreditation, .pred_Yes) %>%
autoplot()
xgboost_last <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
last_fit(data_split)
## New names:
## New names:
## • `Address_line_1_Units.1...2` -> `Address_line_1_Units.1`
collect_metrics(xgboost_last)
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy binary 0.693 Preprocessor1_Model1
## 2 roc_auc binary 0.773 Preprocessor1_Model1
## 3 brier_class binary 0.193 Preprocessor1_Model1
collect_predictions((xgboost_last)) %>%
#mutate(.pred_Accredited = as.factor(.pred_Accredited)) %>%
yardstick::conf_mat(Accreditation, .pred_class) %>%
autoplot()
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
xgboost_last %>%
workflows::extract_fit_engine() %>%
vip()