Goal is to predict attrition, employees who are likely to leave the company.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(correlationfunnel)
## ══ correlationfunnel Tip #2 ════════════════════════════════════════════════════
## Clean your NA's prior to using `binarize()`.
## Missing values and cleaning data are critical to getting great correlations. :)
data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(data)
Name | data |
Number of rows | 4191 |
Number of columns | 35 |
_______________________ | |
Column type frequency: | |
character | 24 |
numeric | 11 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
museum_id | 0 | 1.00 | 8 | 15 | 0 | 4191 | 0 |
Name_of_museum | 0 | 1.00 | 3 | 76 | 0 | 4190 | 0 |
Address_line_1 | 441 | 0.89 | 3 | 61 | 0 | 3212 | 0 |
Address_line_2 | 2816 | 0.33 | 3 | 39 | 0 | 1167 | 0 |
Village,_Town_or_City | 4 | 1.00 | 3 | 24 | 0 | 1696 | 0 |
Postcode | 0 | 1.00 | 6 | 9 | 0 | 3918 | 0 |
Admin_area | 0 | 1.00 | 12 | 137 | 0 | 393 | 0 |
Accreditation | 0 | 1.00 | 10 | 12 | 0 | 2 | 0 |
Governance | 0 | 1.00 | 7 | 41 | 0 | 13 | 0 |
Size | 0 | 1.00 | 4 | 7 | 0 | 5 | 0 |
Size_provenance | 179 | 0.96 | 2 | 29 | 0 | 16 | 0 |
Subject_Matter | 0 | 1.00 | 5 | 45 | 0 | 114 | 0 |
Year_opened | 0 | 1.00 | 9 | 9 | 0 | 351 | 0 |
Year_closed | 0 | 1.00 | 9 | 9 | 0 | 170 | 0 |
DOMUS_Subject_Matter | 2788 | 0.33 | 5 | 27 | 0 | 21 | 0 |
Primary_provenance_of_data | 0 | 1.00 | 3 | 8 | 0 | 18 | 0 |
Identifier_used_in_primary_data_source | 2056 | 0.51 | 2 | 8 | 0 | 2134 | 0 |
Area_Geodemographic_group | 49 | 0.99 | 11 | 40 | 0 | 17 | 0 |
Area_Geodemographic_group_code | 49 | 0.99 | 3 | 3 | 0 | 16 | 0 |
Area_Geodemographic_subgroup | 49 | 0.99 | 12 | 39 | 0 | 25 | 0 |
Area_Geodemographic_subgroup_code | 49 | 0.99 | 4 | 4 | 0 | 24 | 0 |
Area_Geodemographic_supergroup | 49 | 0.99 | 16 | 39 | 0 | 8 | 0 |
Area_Geodemographic_supergroup_code | 49 | 0.99 | 2 | 2 | 0 | 8 | 0 |
Notes | 2980 | 0.29 | 12 | 751 | 0 | 956 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Latitude | 0 | 1.00 | 52.93 | 2.09 | 49.18 | 51.48 | 52.47 | 53.96 | 100.00 | ▇▁▁▁▁ |
Longitude | 0 | 1.00 | -1.96 | 1.84 | -8.09 | -3.10 | -1.87 | -0.48 | 1.76 | ▁▂▇▇▅ |
DOMUS_identifier | 2347 | 0.44 | 1303.45 | 1597.19 | 1.00 | 486.50 | 991.50 | 1470.25 | 7746.00 | ▇▂▁▁▁ |
Area_Deprivation_index | 49 | 0.99 | 5.44 | 2.48 | 1.00 | 4.00 | 5.00 | 7.00 | 10.00 | ▃▆▇▆▃ |
Area_Deprivation_index_crime | 49 | 0.99 | 5.43 | 3.07 | 1.00 | 3.00 | 6.00 | 8.00 | 10.00 | ▇▆▅▇▇ |
Area_Deprivation_index_education | 49 | 0.99 | 6.04 | 2.61 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▅▇▇▆ |
Area_Deprivation_index_employment | 49 | 0.99 | 6.08 | 2.76 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▇▇▇ |
Area_Deprivation_index_health | 49 | 0.99 | 6.02 | 2.82 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▆▇▇ |
Area_Deprivation_index_housing | 49 | 0.99 | 3.97 | 2.75 | 1.00 | 1.00 | 3.00 | 6.00 | 10.00 | ▇▅▃▂▂ |
Area_Deprivation_index_income | 49 | 0.99 | 5.99 | 2.62 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▆▇▇▆ |
Area_Deprivation_index_services | 49 | 0.99 | 4.78 | 3.01 | 1.00 | 2.00 | 4.00 | 7.00 | 10.00 | ▇▅▅▅▅ |
Issues with data * Missing values * Factors or numeric variables * Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code, Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services * Zero variance variables * Character variables: Convert them to numbers in recipe step * Unbalanced target variable: Accreditation * ID variable: museum_id
# Treat missing values and clean data.
data_clean <- data %>%
select(-Size_provenance, -DOMUS_Subject_Matter, -Year_closed, -Primary_provenance_of_data, -Identifier_used_in_primary_data_source, -Area_Geodemographic_subgroup, -Area_Geodemographic_subgroup_code, -Area_Geodemographic_supergroup, -Area_Geodemographic_supergroup_code, -Notes) %>%
select(-Latitude, -Longitude, -DOMUS_identifier) %>%
na.omit()
factors_vec <- data %>% select(Governance, Size, Subject_Matter, Area_Geodemographic_group, Area_Geodemographic_group_code,
Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment,
Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services) %>% names()
data_clean <- data %>%
# Address factors imported as numeric
mutate(across(all_of(factors_vec), as.factor))
Accredited vs. Unaccredited
data_clean %>% count(Accreditation)
## # A tibble: 2 × 2
## Accreditation n
## <chr> <int>
## 1 Accredited 1720
## 2 Unaccredited 2471
data_clean %>%
ggplot(aes(Accreditation)) +
geom_bar()
Most common Subjects
data_clean %>% count(Subject_Matter) %>% arrange(desc(n))
## # A tibble: 114 × 2
## Subject_Matter n
## <fct> <int>
## 1 Local_Histories 887
## 2 Buildings-Houses-Large_houses 414
## 3 Arts-Fine_and_decorative_arts 195
## 4 Transport-Trains_and_railways 139
## 5 War_and_conflict-Regiment 132
## 6 Mixed-Encyclopaedic 119
## 7 Mixed-Other 85
## 8 Personality-Literary 83
## 9 Other 79
## 10 Buildings-Houses-Medium_houses 73
## # ℹ 104 more rows
data_clean %>%
ggplot(aes(x= Subject_Matter)) +
geom_bar()
Years museums opened
data_clean %>%
ggplot(aes(Year_opened, Accreditation)) +
geom_count()
correlation plot
data_clean <- na.omit(data_clean)
# Step 1: binarize
data_binarized <- data_clean %>%
select(-museum_id) %>%
binarize()
data_binarized %>% glimpse()
## Rows: 237
## Columns: 338
## $ `Name_of_museum__13th/18th_Royal_Hussars_Museum` <dbl> …
## $ `Name_of_museum__-OTHER` <dbl> …
## $ Address_line_1__Discovery_Museum <dbl> …
## $ Address_line_1__Peninsula_Barracks <dbl> …
## $ Address_line_1__The_Redoubt_Fortress <dbl> …
## $ Address_line_1__Town_Hall <dbl> …
## $ `Address_line_1__-OTHER` <dbl> …
## $ Address_line_2__Blandford_Square <dbl> …
## $ Address_line_2__High_Street <dbl> …
## $ Address_line_2__Market_Place <dbl> …
## $ Address_line_2__Romsey_Road <dbl> …
## $ Address_line_2__Royal_Parade <dbl> …
## $ Address_line_2__Station_Road <dbl> …
## $ `Address_line_2__-OTHER` <dbl> …
## $ `Village,_Town_or_City__Aberdeen` <dbl> …
## $ `Village,_Town_or_City__Birmingham` <dbl> …
## $ `Village,_Town_or_City__Eastbourne` <dbl> …
## $ `Village,_Town_or_City__Edinburgh` <dbl> …
## $ `Village,_Town_or_City__Fareham` <dbl> …
## $ `Village,_Town_or_City__Glasgow` <dbl> …
## $ `Village,_Town_or_City__Halifax` <dbl> …
## $ `Village,_Town_or_City__London` <dbl> …
## $ `Village,_Town_or_City__Manchester` <dbl> …
## $ `Village,_Town_or_City__Newcastle_upon_Tyne` <dbl> …
## $ `Village,_Town_or_City__Nottingham` <dbl> …
## $ `Village,_Town_or_City__Salisbury` <dbl> …
## $ `Village,_Town_or_City__Sheffield` <dbl> …
## $ `Village,_Town_or_City__Stratford-upon-Avon` <dbl> …
## $ `Village,_Town_or_City__Telford` <dbl> …
## $ `Village,_Town_or_City__Warwick` <dbl> …
## $ `Village,_Town_or_City__Winchester` <dbl> …
## $ `Village,_Town_or_City__Worcester` <dbl> …
## $ `Village,_Town_or_City__-OTHER` <dbl> …
## $ Postcode__BN22_7AQ <dbl> …
## $ Postcode__NE1_4JA <dbl> …
## $ Postcode__SO23_8TS <dbl> …
## $ `Postcode__-OTHER` <dbl> …
## $ `Latitude__-Inf_51.500426` <dbl> …
## $ Latitude__51.500426_52.455527 <dbl> …
## $ Latitude__52.455527_53.976356 <dbl> …
## $ Latitude__53.976356_Inf <dbl> …
## $ `Longitude__-Inf_-2.715617` <dbl> …
## $ `Longitude__-2.715617_-1.799645` <dbl> …
## $ `Longitude__-1.799645_-0.750569` <dbl> …
## $ `Longitude__-0.750569_Inf` <dbl> …
## $ `Admin_area__/England/London_(English_Region)/Camden_(London_Borough)` <dbl> …
## $ `Admin_area__/England/North_East_(English_Region)/North_East_(English_CA)/Newcastle_upon_Tyne_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/North_West_(English_Region)/Greater_Manchester_(English_CA)/Manchester_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/South_East_(English_Region)/East_Sussex_(English_County)/Eastbourne_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/South_East_(English_Region)/Hampshire_(English_County)/Winchester_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Wiltshire_(English_UA)` <dbl> …
## $ `Admin_area__/England/West_Midlands_(English_Region)/Telford_and_Wrekin_(English_UA)` <dbl> …
## $ `Admin_area__/England/West_Midlands_(English_Region)/Warwickshire_(English_County)/Stratford-on-Avon_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/West_Midlands_(English_Region)/Warwickshire_(English_County)/Warwick_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/West_Midlands_(English_Region)/West_Midlands_(English_CA)/Birmingham_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/Sheffield_City_Region_(English_CA)/Sheffield_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/West_Yorkshire_(English_CA)/Calderdale_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/Scotland/Aberdeenshire_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__/Scotland/City_of_Edinburgh_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__/Scotland/Fife_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__-OTHER` <dbl> …
## $ Accreditation__Accredited <dbl> …
## $ Accreditation__Unaccredited <dbl> …
## $ `Governance__Government-Local_Authority` <dbl> …
## $ `Governance__Government-National` <dbl> …
## $ `Governance__Independent-Not_for_profit` <dbl> …
## $ Governance__University <dbl> …
## $ `Governance__-OTHER` <dbl> …
## $ Size__large <dbl> …
## $ Size__medium <dbl> …
## $ Size__small <dbl> …
## $ `Size__-OTHER` <dbl> …
## $ Size_provenance__domus <dbl> …
## $ Size_provenance__mm_prediction_random_forest <dbl> …
## $ Size_provenance__visitbritain <dbl> …
## $ `Size_provenance__-OTHER` <dbl> …
## $ `Subject_Matter__Archaeology-Prehistory` <dbl> …
## $ `Subject_Matter__Arts-Ceramics` <dbl> …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts` <dbl> …
## $ `Subject_Matter__Buildings-Houses-Large_houses` <dbl> …
## $ `Subject_Matter__Buildings-Houses-Medium_houses` <dbl> …
## $ `Subject_Matter__Buildings-Other` <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Metals` <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying` <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Mixed` <dbl> …
## $ Subject_Matter__Local_Histories <dbl> …
## $ `Subject_Matter__Mixed-Encyclopaedic` <dbl> …
## $ `Subject_Matter__Mixed-Other` <dbl> …
## $ `Subject_Matter__Natural_world-Geology` <dbl> …
## $ Subject_Matter__Other <dbl> …
## $ `Subject_Matter__Personality-Literary` <dbl> …
## $ `Subject_Matter__Personality-Music` <dbl> …
## $ `Subject_Matter__Personality-Other` <dbl> …
## $ `Subject_Matter__Rural_Industry-Farming` <dbl> …
## $ `Subject_Matter__Rural_Industry-Watermills` <dbl> …
## $ `Subject_Matter__Rural_Industry-Windmills` <dbl> …
## $ `Subject_Matter__Sea_and_seafaring-Mixed` <dbl> …
## $ `Subject_Matter__Transport-Canals` <dbl> …
## $ `Subject_Matter__Transport-Trains_and_railways` <dbl> …
## $ `Subject_Matter__War_and_conflict-Regiment` <dbl> …
## $ `Subject_Matter__-OTHER` <dbl> …
## $ `Year_opened__1892:1892` <dbl> …
## $ `Year_opened__1901:1901` <dbl> …
## $ `Year_opened__1927:1927` <dbl> …
## $ `Year_opened__1930:1930` <dbl> …
## $ `Year_opened__1932:1932` <dbl> …
## $ `Year_opened__1934:1934` <dbl> …
## $ `Year_opened__1935:1935` <dbl> …
## $ `Year_opened__1963:1963` <dbl> …
## $ `Year_opened__1964:1964` <dbl> …
## $ `Year_opened__1967:1967` <dbl> …
## $ `Year_opened__1969:1969` <dbl> …
## $ `Year_opened__1972:1972` <dbl> …
## $ `Year_opened__1973:1973` <dbl> …
## $ `Year_opened__1974:1974` <dbl> …
## $ `Year_opened__1975:1975` <dbl> …
## $ `Year_opened__1976:1976` <dbl> …
## $ `Year_opened__1977:1977` <dbl> …
## $ `Year_opened__1978:1978` <dbl> …
## $ `Year_opened__1979:1979` <dbl> …
## $ `Year_opened__1980:1980` <dbl> …
## $ `Year_opened__1981:1981` <dbl> …
## $ `Year_opened__1982:1982` <dbl> …
## $ `Year_opened__1983:1983` <dbl> …
## $ `Year_opened__1984:1984` <dbl> …
## $ `Year_opened__1985:1985` <dbl> …
## $ `Year_opened__1986:1986` <dbl> …
## $ `Year_opened__1988:1988` <dbl> …
## $ `Year_opened__1989:1989` <dbl> …
## $ `Year_opened__1990:1990` <dbl> …
## $ `Year_opened__1992:1992` <dbl> …
## $ `Year_opened__1993:1993` <dbl> …
## $ `Year_opened__1996:1996` <dbl> …
## $ `Year_opened__-OTHER` <dbl> …
## $ `Year_closed__2017:2017` <dbl> …
## $ `Year_closed__9999:9999` <dbl> …
## $ `Year_closed__-OTHER` <dbl> …
## $ DOMUS_Subject_Matter__agriculture <dbl> …
## $ DOMUS_Subject_Matter__archaeology <dbl> …
## $ DOMUS_Subject_Matter__archives <dbl> …
## $ DOMUS_Subject_Matter__arms_and_armour <dbl> …
## $ DOMUS_Subject_Matter__biology_and_natural_history <dbl> …
## $ DOMUS_Subject_Matter__costume_and_textiles <dbl> …
## $ DOMUS_Subject_Matter__decorative_and_applied_arts <dbl> …
## $ DOMUS_Subject_Matter__fine_art <dbl> …
## $ DOMUS_Subject_Matter__maritime <dbl> …
## $ DOMUS_Subject_Matter__military <dbl> …
## $ DOMUS_Subject_Matter__mixed_collection <dbl> …
## $ DOMUS_Subject_Matter__personalia <dbl> …
## $ DOMUS_Subject_Matter__science_and_industry <dbl> …
## $ DOMUS_Subject_Matter__social_history <dbl> …
## $ DOMUS_Subject_Matter__transport <dbl> …
## $ `DOMUS_Subject_Matter__-OTHER` <dbl> …
## $ `DOMUS_identifier__-Inf_476` <dbl> …
## $ DOMUS_identifier__476_953 <dbl> …
## $ DOMUS_identifier__953_1329 <dbl> …
## $ DOMUS_identifier__1329_Inf <dbl> …
## $ Primary_provenance_of_data__domus <dbl> …
## $ `Primary_provenance_of_data__-OTHER` <dbl> …
## $ Identifier_used_in_primary_data_source__EM000005 <dbl> …
## $ `Identifier_used_in_primary_data_source__-OTHER` <dbl> …
## $ Area_Deprivation_index__1 <dbl> …
## $ Area_Deprivation_index__2 <dbl> …
## $ Area_Deprivation_index__3 <dbl> …
## $ Area_Deprivation_index__4 <dbl> …
## $ Area_Deprivation_index__5 <dbl> …
## $ Area_Deprivation_index__6 <dbl> …
## $ Area_Deprivation_index__7 <dbl> …
## $ Area_Deprivation_index__8 <dbl> …
## $ Area_Deprivation_index__9 <dbl> …
## $ Area_Deprivation_index__10 <dbl> …
## $ Area_Deprivation_index_crime__1 <dbl> …
## $ Area_Deprivation_index_crime__2 <dbl> …
## $ Area_Deprivation_index_crime__3 <dbl> …
## $ Area_Deprivation_index_crime__4 <dbl> …
## $ Area_Deprivation_index_crime__5 <dbl> …
## $ Area_Deprivation_index_crime__6 <dbl> …
## $ Area_Deprivation_index_crime__7 <dbl> …
## $ Area_Deprivation_index_crime__8 <dbl> …
## $ Area_Deprivation_index_crime__9 <dbl> …
## $ Area_Deprivation_index_crime__10 <dbl> …
## $ Area_Deprivation_index_education__1 <dbl> …
## $ Area_Deprivation_index_education__2 <dbl> …
## $ Area_Deprivation_index_education__3 <dbl> …
## $ Area_Deprivation_index_education__4 <dbl> …
## $ Area_Deprivation_index_education__5 <dbl> …
## $ Area_Deprivation_index_education__6 <dbl> …
## $ Area_Deprivation_index_education__7 <dbl> …
## $ Area_Deprivation_index_education__8 <dbl> …
## $ Area_Deprivation_index_education__9 <dbl> …
## $ Area_Deprivation_index_education__10 <dbl> …
## $ Area_Deprivation_index_employment__1 <dbl> …
## $ Area_Deprivation_index_employment__2 <dbl> …
## $ Area_Deprivation_index_employment__3 <dbl> …
## $ Area_Deprivation_index_employment__4 <dbl> …
## $ Area_Deprivation_index_employment__5 <dbl> …
## $ Area_Deprivation_index_employment__6 <dbl> …
## $ Area_Deprivation_index_employment__7 <dbl> …
## $ Area_Deprivation_index_employment__8 <dbl> …
## $ Area_Deprivation_index_employment__9 <dbl> …
## $ Area_Deprivation_index_employment__10 <dbl> …
## $ Area_Deprivation_index_health__1 <dbl> …
## $ Area_Deprivation_index_health__2 <dbl> …
## $ Area_Deprivation_index_health__3 <dbl> …
## $ Area_Deprivation_index_health__4 <dbl> …
## $ Area_Deprivation_index_health__5 <dbl> …
## $ Area_Deprivation_index_health__6 <dbl> …
## $ Area_Deprivation_index_health__7 <dbl> …
## $ Area_Deprivation_index_health__8 <dbl> …
## $ Area_Deprivation_index_health__9 <dbl> …
## $ Area_Deprivation_index_health__10 <dbl> …
## $ Area_Deprivation_index_housing__1 <dbl> …
## $ Area_Deprivation_index_housing__2 <dbl> …
## $ Area_Deprivation_index_housing__3 <dbl> …
## $ Area_Deprivation_index_housing__4 <dbl> …
## $ Area_Deprivation_index_housing__5 <dbl> …
## $ Area_Deprivation_index_housing__6 <dbl> …
## $ Area_Deprivation_index_housing__7 <dbl> …
## $ Area_Deprivation_index_housing__8 <dbl> …
## $ Area_Deprivation_index_housing__9 <dbl> …
## $ Area_Deprivation_index_housing__10 <dbl> …
## $ Area_Deprivation_index_income__1 <dbl> …
## $ Area_Deprivation_index_income__2 <dbl> …
## $ Area_Deprivation_index_income__3 <dbl> …
## $ Area_Deprivation_index_income__4 <dbl> …
## $ Area_Deprivation_index_income__5 <dbl> …
## $ Area_Deprivation_index_income__6 <dbl> …
## $ Area_Deprivation_index_income__7 <dbl> …
## $ Area_Deprivation_index_income__8 <dbl> …
## $ Area_Deprivation_index_income__9 <dbl> …
## $ Area_Deprivation_index_income__10 <dbl> …
## $ Area_Deprivation_index_services__1 <dbl> …
## $ Area_Deprivation_index_services__2 <dbl> …
## $ Area_Deprivation_index_services__3 <dbl> …
## $ Area_Deprivation_index_services__4 <dbl> …
## $ Area_Deprivation_index_services__5 <dbl> …
## $ Area_Deprivation_index_services__6 <dbl> …
## $ Area_Deprivation_index_services__7 <dbl> …
## $ Area_Deprivation_index_services__8 <dbl> …
## $ Area_Deprivation_index_services__9 <dbl> …
## $ Area_Deprivation_index_services__10 <dbl> …
## $ Area_Geodemographic_group__Country_Living <dbl> …
## $ Area_Geodemographic_group__English_and_Welsh_Countryside <dbl> …
## $ Area_Geodemographic_group__Ethnically_Diverse_Metropolitan_Living <dbl> …
## $ Area_Geodemographic_group__Larger_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_group__London_Cosmopolitan <dbl> …
## $ Area_Geodemographic_group__Manufacturing_Traits <dbl> …
## $ Area_Geodemographic_group__Northern_Ireland_Countryside <dbl> …
## $ Area_Geodemographic_group__Remoter_Coastal_Living <dbl> …
## $ `Area_Geodemographic_group__Rural-Urban_Fringe` <dbl> …
## $ Area_Geodemographic_group__Scottish_Countryside <dbl> …
## $ Area_Geodemographic_group__Scottish_Industrial_Heritage <dbl> …
## $ Area_Geodemographic_group__Services_Manufacturing_and_Mining_Legacy <dbl> …
## $ Area_Geodemographic_group__Suburban_Traits <dbl> …
## $ Area_Geodemographic_group__Thriving_Rural <dbl> …
## $ Area_Geodemographic_group__Town_Living <dbl> …
## $ Area_Geodemographic_group__University_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_group_code__1ar <dbl> …
## $ Area_Geodemographic_group_code__1br <dbl> …
## $ Area_Geodemographic_group_code__2ar <dbl> …
## $ Area_Geodemographic_group_code__2br <dbl> …
## $ Area_Geodemographic_group_code__3ar <dbl> …
## $ Area_Geodemographic_group_code__3br <dbl> …
## $ Area_Geodemographic_group_code__3cr <dbl> …
## $ Area_Geodemographic_group_code__4ar <dbl> …
## $ Area_Geodemographic_group_code__5ar <dbl> …
## $ Area_Geodemographic_group_code__6ar <dbl> …
## $ Area_Geodemographic_group_code__6br <dbl> …
## $ Area_Geodemographic_group_code__7ar <dbl> …
## $ Area_Geodemographic_group_code__7br <dbl> …
## $ Area_Geodemographic_group_code__7cr <dbl> …
## $ Area_Geodemographic_group_code__8ar <dbl> …
## $ Area_Geodemographic_group_code__8br <dbl> …
## $ Area_Geodemographic_subgroup__Affluent_rural <dbl> …
## $ Area_Geodemographic_subgroup__Ageing_Coastal_Living <dbl> …
## $ Area_Geodemographic_subgroup__Country_Living <dbl> …
## $ Area_Geodemographic_subgroup__Ethnically_Diverse_Metropolitan_Living <dbl> …
## $ Area_Geodemographic_subgroup__Expanding_Areas <dbl> …
## $ `Area_Geodemographic_subgroup__Industrial_and_Multi-ethnic` <dbl> …
## $ Area_Geodemographic_subgroup__Larger_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_subgroup__London_Cosmopolitan <dbl> …
## $ Area_Geodemographic_subgroup__Manufacturing_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Mining_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Northern_Ireland_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__Older_Farming_Communities <dbl> …
## $ Area_Geodemographic_subgroup__Prosperous_Towns <dbl> …
## $ Area_Geodemographic_subgroup__Rural_Growth_Areas <dbl> …
## $ `Area_Geodemographic_subgroup__Rural-Urban_Fringe` <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Industrial_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Service_Economy <dbl> …
## $ Area_Geodemographic_subgroup__Sparse_English_and_Welsh_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__University_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_subgroup__Urban_Living <dbl> …
## $ `Area_Geodemographic_subgroup__-OTHER` <dbl> …
## $ Area_Geodemographic_subgroup_code__1a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__1b1r <dbl> …
## $ Area_Geodemographic_subgroup_code__1b2r <dbl> …
## $ Area_Geodemographic_subgroup_code__2a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__2b1r <dbl> …
## $ Area_Geodemographic_subgroup_code__3a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__3a2r <dbl> …
## $ Area_Geodemographic_subgroup_code__3b1r <dbl> …
## $ Area_Geodemographic_subgroup_code__3c1r <dbl> …
## $ Area_Geodemographic_subgroup_code__4a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__5a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__6a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__6a2r <dbl> …
## $ Area_Geodemographic_subgroup_code__6a3r <dbl> …
## $ Area_Geodemographic_subgroup_code__6b1r <dbl> …
## $ Area_Geodemographic_subgroup_code__7a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__7b1r <dbl> …
## $ Area_Geodemographic_subgroup_code__7c2r <dbl> …
## $ Area_Geodemographic_subgroup_code__8a1r <dbl> …
## $ Area_Geodemographic_subgroup_code__8a2r <dbl> …
## $ Area_Geodemographic_subgroup_code__8b2r <dbl> …
## $ `Area_Geodemographic_subgroup_code__-OTHER` <dbl> …
## $ Area_Geodemographic_supergroup__Affluent_England <dbl> …
## $ Area_Geodemographic_supergroup__Business_Education_and_Heritage_Centres <dbl> …
## $ Area_Geodemographic_supergroup__Countryside_Living <dbl> …
## $ Area_Geodemographic_supergroup__Ethnically_Diverse_Metropolitan_Living <dbl> …
## $ Area_Geodemographic_supergroup__London_Cosmopolitan <dbl> …
## $ Area_Geodemographic_supergroup__Services_and_Industrial_Legacy <dbl> …
## $ Area_Geodemographic_supergroup__Town_and_Country_Living <dbl> …
## $ Area_Geodemographic_supergroup__Urban_Settlements <dbl> …
## $ Area_Geodemographic_supergroup_code__1r <dbl> …
## $ Area_Geodemographic_supergroup_code__2r <dbl> …
## $ Area_Geodemographic_supergroup_code__3r <dbl> …
## $ Area_Geodemographic_supergroup_code__4r <dbl> …
## $ Area_Geodemographic_supergroup_code__5r <dbl> …
## $ Area_Geodemographic_supergroup_code__6r <dbl> …
## $ Area_Geodemographic_supergroup_code__7r <dbl> …
## $ Area_Geodemographic_supergroup_code__8r <dbl> …
## $ Notes__Lost_accreditation_2016 <dbl> …
## $ Notes__Lost_accreditation_2017 <dbl> …
## $ `Notes__Premises_provided_by_the_Local_Authority_(Amot_Museums)` <dbl> …
## $ `Notes__Premises_provided_by_the_MoD._Museum_receives_funding_from_the_MoD_(Amot_Museums)` <dbl> …
## $ `Notes__-OTHER` <dbl> …
# Step 2: correlate
data_correlation <- data_binarized %>%
correlate(Accreditation__Accredited)
data_correlation
## # A tibble: 338 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 Accreditation Accredited 1
## 2 Accreditation Unaccredited -1
## 3 Year_closed 9999:9999 0.486
## 4 Year_closed -OTHER -0.430
## 5 Notes Lost_accreditation_2016 -0.309
## 6 Notes -OTHER 0.206
## 7 Year_closed 2017:2017 -0.206
## 8 Year_opened 1993:1993 -0.206
## 9 Size small -0.206
## 10 Longitude -0.750569_Inf -0.167
## # ℹ 328 more rows
# Step 3: Plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 328 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
This is last week’s Apply to your Data 5 with a new dataset.