museums <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
museums %>% skimr::skim()
Name | Piped data |
Number of rows | 4191 |
Number of columns | 35 |
_______________________ | |
Column type frequency: | |
character | 24 |
numeric | 11 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
museum_id | 0 | 1.00 | 8 | 15 | 0 | 4191 | 0 |
Name_of_museum | 0 | 1.00 | 3 | 76 | 0 | 4190 | 0 |
Address_line_1 | 441 | 0.89 | 3 | 61 | 0 | 3212 | 0 |
Address_line_2 | 2816 | 0.33 | 3 | 39 | 0 | 1167 | 0 |
Village,_Town_or_City | 4 | 1.00 | 3 | 24 | 0 | 1696 | 0 |
Postcode | 0 | 1.00 | 6 | 9 | 0 | 3918 | 0 |
Admin_area | 0 | 1.00 | 12 | 137 | 0 | 393 | 0 |
Accreditation | 0 | 1.00 | 10 | 12 | 0 | 2 | 0 |
Governance | 0 | 1.00 | 7 | 41 | 0 | 13 | 0 |
Size | 0 | 1.00 | 4 | 7 | 0 | 5 | 0 |
Size_provenance | 179 | 0.96 | 2 | 29 | 0 | 16 | 0 |
Subject_Matter | 0 | 1.00 | 5 | 45 | 0 | 114 | 0 |
Year_opened | 0 | 1.00 | 9 | 9 | 0 | 351 | 0 |
Year_closed | 0 | 1.00 | 9 | 9 | 0 | 170 | 0 |
DOMUS_Subject_Matter | 2788 | 0.33 | 5 | 27 | 0 | 21 | 0 |
Primary_provenance_of_data | 0 | 1.00 | 3 | 8 | 0 | 18 | 0 |
Identifier_used_in_primary_data_source | 2056 | 0.51 | 2 | 8 | 0 | 2134 | 0 |
Area_Geodemographic_group | 49 | 0.99 | 11 | 40 | 0 | 17 | 0 |
Area_Geodemographic_group_code | 49 | 0.99 | 3 | 3 | 0 | 16 | 0 |
Area_Geodemographic_subgroup | 49 | 0.99 | 12 | 39 | 0 | 25 | 0 |
Area_Geodemographic_subgroup_code | 49 | 0.99 | 4 | 4 | 0 | 24 | 0 |
Area_Geodemographic_supergroup | 49 | 0.99 | 16 | 39 | 0 | 8 | 0 |
Area_Geodemographic_supergroup_code | 49 | 0.99 | 2 | 2 | 0 | 8 | 0 |
Notes | 2980 | 0.29 | 12 | 751 | 0 | 956 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Latitude | 0 | 1.00 | 52.93 | 2.09 | 49.18 | 51.48 | 52.47 | 53.96 | 100.00 | ▇▁▁▁▁ |
Longitude | 0 | 1.00 | -1.96 | 1.84 | -8.09 | -3.10 | -1.87 | -0.48 | 1.76 | ▁▂▇▇▅ |
DOMUS_identifier | 2347 | 0.44 | 1303.45 | 1597.19 | 1.00 | 486.50 | 991.50 | 1470.25 | 7746.00 | ▇▂▁▁▁ |
Area_Deprivation_index | 49 | 0.99 | 5.44 | 2.48 | 1.00 | 4.00 | 5.00 | 7.00 | 10.00 | ▃▆▇▆▃ |
Area_Deprivation_index_crime | 49 | 0.99 | 5.43 | 3.07 | 1.00 | 3.00 | 6.00 | 8.00 | 10.00 | ▇▆▅▇▇ |
Area_Deprivation_index_education | 49 | 0.99 | 6.04 | 2.61 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▅▇▇▆ |
Area_Deprivation_index_employment | 49 | 0.99 | 6.08 | 2.76 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▇▇▇ |
Area_Deprivation_index_health | 49 | 0.99 | 6.02 | 2.82 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▅▆▆▇▇ |
Area_Deprivation_index_housing | 49 | 0.99 | 3.97 | 2.75 | 1.00 | 1.00 | 3.00 | 6.00 | 10.00 | ▇▅▃▂▂ |
Area_Deprivation_index_income | 49 | 0.99 | 5.99 | 2.62 | 1.00 | 4.00 | 6.00 | 8.00 | 10.00 | ▃▆▇▇▆ |
Area_Deprivation_index_services | 49 | 0.99 | 4.78 | 3.01 | 1.00 | 2.00 | 4.00 | 7.00 | 10.00 | ▇▅▅▅▅ |
colnames(museums)[5] <- "Village_Town_or_City"
factors_vec <- museums %>% select(Name_of_museum, Village_Town_or_City, Accreditation, Governance, Subject_Matter) %>% names()
museums_clean <- museums %>%
mutate(across(where(is.character), factor)) %>%
separate_wider_delim(Year_opened, delim = ":", names = c("Year_opened", "Y_o_duplicate")) %>%
select(-museum_id, -Latitude, -Longitude, -Notes, -Address_line_2, -Postcode, DOMUS_Subject_Matter, -DOMUS_identifier,-Identifier_used_in_primary_data_source, Address_line_1, -Size_provenance, -Y_o_duplicate, -Year_closed, -Area_Deprivation_index, Area_Deprivation_index_crime, Area_Deprivation_index_education, Area_Deprivation_index_employment, Area_Deprivation_index_health, Area_Deprivation_index_housing, Area_Deprivation_index_income, Area_Deprivation_index_services, -Area_Geodemographic_group, -Area_Geodemographic_group_code, Area_Geodemographic_subgroup, -Area_Geodemographic_subgroup_code, -Area_Geodemographic_supergroup, -Area_Geodemographic_supergroup_code, Village_Town_or_City) %>%
na.omit()
museums_clean %>% glimpse()
## Rows: 1,370
## Columns: 19
## $ Name_of_museum <fct> "Prestongrange Industrial Heritage M…
## $ Address_line_1 <fct> "Prestongrange", "Chambers Street", …
## $ Village_Town_or_City <fct> "nr Prestonpans", "Edinburgh", "Cumn…
## $ Admin_area <fct> /Scotland/East Lothian (Scottish Cou…
## $ Accreditation <fct> Accredited, Accredited, Accredited, …
## $ Governance <fct> Government-Local_Authority, Governme…
## $ Size <fct> small, large, small, small, small, l…
## $ Subject_Matter <fct> Industry_and_manufacture-Mining_and_…
## $ Year_opened <chr> "1993", "1854", "1891", "1966", "198…
## $ DOMUS_Subject_Matter <fct> science and industry, mixed collecti…
## $ Primary_provenance_of_data <fct> domus, domus, domus, domus, domus, d…
## $ Area_Deprivation_index_crime <dbl> 3, 1, 1, 7, 10, 1, 6, 4, 9, 4, 7, 3,…
## $ Area_Deprivation_index_education <dbl> 7, 7, 4, 7, 6, 4, 3, 4, 8, 6, 7, 4, …
## $ Area_Deprivation_index_employment <dbl> 6, 6, 2, 7, 6, 4, 3, 6, 10, 4, 10, 4…
## $ Area_Deprivation_index_health <dbl> 8, 5, 2, 9, 5, 5, 5, 6, 10, 2, 10, 4…
## $ Area_Deprivation_index_housing <dbl> 9, 1, 6, 7, 7, 1, 10, 3, 2, 1, 10, 6…
## $ Area_Deprivation_index_income <dbl> 5, 7, 3, 8, 6, 6, 3, 6, 10, 5, 10, 4…
## $ Area_Deprivation_index_services <dbl> 3, 9, 9, 1, 1, 9, 9, 6, 1, 10, 2, 9,…
## $ Area_Geodemographic_subgroup <fct> Scottish Industrial Legacy, Larger T…
museums_clean %>% count(Accreditation)
## # A tibble: 2 × 2
## Accreditation n
## <fct> <int>
## 1 Accredited 1007
## 2 Unaccredited 363
museums_clean %>% distinct(Subject_Matter)
## # A tibble: 98 × 1
## Subject_Matter
## <fct>
## 1 Industry_and_manufacture-Mining_and_quarrying
## 2 Mixed-Encyclopaedic
## 3 Local_Histories
## 4 War_and_conflict-Military
## 5 Personality-Explorer
## 6 Archaeology-Roman
## 7 Personality-Literary
## 8 Sea_and_seafaring-Fishing
## 9 Science_and_technology-Other
## 10 Sea_and_seafaring-Boats_and_ships
## # ℹ 88 more rows
museums_clean %>% skimr::skim(Subject_Matter)
Name | Piped data |
Number of rows | 1370 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
factor | 1 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Subject_Matter | 0 | 1 | FALSE | 98 | Loc: 397, Art: 101, War: 95, Bui: 71 |
museums_clean %>%
ggplot(aes(Accreditation)) +
geom_bar()
Accreditation vs. Governance
museums_clean %>%
ggplot(aes(Accreditation, Governance)) +
geom_boxplot()
Correlation Plot
# Step 1: Binarize
museums_binarized <- museums_clean %>%
select(-Name_of_museum) %>%
binarize()
museums_binarized %>% glimpse()
## Rows: 1,370
## Columns: 150
## $ Address_line_1__High_Street <dbl> …
## $ `Address_line_1__-OTHER` <dbl> …
## $ Village_Town_or_City__Edinburgh <dbl> …
## $ Village_Town_or_City__Leeds <dbl> …
## $ Village_Town_or_City__London <dbl> …
## $ Village_Town_or_City__Manchester <dbl> …
## $ Village_Town_or_City__York <dbl> …
## $ `Village_Town_or_City__-OTHER` <dbl> …
## $ `Admin_area__/England/London_(English_Region)/Camden_(London_Borough)` <dbl> …
## $ `Admin_area__/England/North_East_(English_Region)/North_East_(English_CA)/Northumberland_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/North_West_(English_Region)/Greater_Manchester_(English_CA)/Manchester_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Cornwall_(English_UA)` <dbl> …
## $ `Admin_area__/England/South_West_(English_Region)/Wiltshire_(English_UA)` <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/West_Yorkshire_(English_CA)/Leeds_(English_District_or_Borough)` <dbl> …
## $ `Admin_area__/England/Yorkshire_and_The_Humber_(English_Region)/York_(English_UA)` <dbl> …
## $ `Admin_area__/Scotland/City_of_Edinburgh_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__/Scotland/Dumfries_and_Galloway_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__/Scotland/Highland_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__/Scotland/Scottish_Borders_(Scottish_Council_Area)` <dbl> …
## $ `Admin_area__-OTHER` <dbl> …
## $ Accreditation__Accredited <dbl> …
## $ Accreditation__Unaccredited <dbl> …
## $ `Governance__Government-Local_Authority` <dbl> …
## $ `Governance__Government-National` <dbl> …
## $ `Governance__Independent-English_Heritage` <dbl> …
## $ `Governance__Independent-National_Trust` <dbl> …
## $ `Governance__Independent-Not_for_profit` <dbl> …
## $ `Governance__Independent-Unknown` <dbl> …
## $ Governance__University <dbl> …
## $ `Governance__-OTHER` <dbl> …
## $ Size__large <dbl> …
## $ Size__medium <dbl> …
## $ Size__small <dbl> …
## $ `Size__-OTHER` <dbl> …
## $ `Subject_Matter__Archaeology-Roman` <dbl> …
## $ `Subject_Matter__Arts-Fine_and_decorative_arts` <dbl> …
## $ `Subject_Matter__Buildings-Houses-Large_houses` <dbl> …
## $ `Subject_Matter__Buildings-Houses-Medium_houses` <dbl> …
## $ `Subject_Matter__Buildings-Other` <dbl> …
## $ `Subject_Matter__Industry_and_manufacture-Mining_and_quarrying` <dbl> …
## $ Subject_Matter__Local_Histories <dbl> …
## $ `Subject_Matter__Mixed-Encyclopaedic` <dbl> …
## $ `Subject_Matter__Mixed-Other` <dbl> …
## $ `Subject_Matter__Personality-Literary` <dbl> …
## $ `Subject_Matter__Sea_and_seafaring-Mixed` <dbl> …
## $ `Subject_Matter__Transport-Trains_and_railways` <dbl> …
## $ `Subject_Matter__War_and_conflict-Castles_and_forts` <dbl> …
## $ `Subject_Matter__War_and_conflict-Regiment` <dbl> …
## $ `Subject_Matter__-OTHER` <dbl> …
## $ Year_opened__1927 <dbl> …
## $ Year_opened__1951 <dbl> …
## $ Year_opened__1961 <dbl> …
## $ Year_opened__1962 <dbl> …
## $ Year_opened__1963 <dbl> …
## $ Year_opened__1964 <dbl> …
## $ Year_opened__1966 <dbl> …
## $ Year_opened__1967 <dbl> …
## $ Year_opened__1968 <dbl> …
## $ Year_opened__1969 <dbl> …
## $ Year_opened__1972 <dbl> …
## $ Year_opened__1973 <dbl> …
## $ Year_opened__1974 <dbl> …
## $ Year_opened__1975 <dbl> …
## $ Year_opened__1976 <dbl> …
## $ Year_opened__1977 <dbl> …
## $ Year_opened__1978 <dbl> …
## $ Year_opened__1979 <dbl> …
## $ Year_opened__1980 <dbl> …
## $ Year_opened__1981 <dbl> …
## $ Year_opened__1982 <dbl> …
## $ Year_opened__1983 <dbl> …
## $ Year_opened__1984 <dbl> …
## $ Year_opened__1985 <dbl> …
## $ Year_opened__1986 <dbl> …
## $ Year_opened__1987 <dbl> …
## $ Year_opened__1988 <dbl> …
## $ Year_opened__1989 <dbl> …
## $ Year_opened__1990 <dbl> …
## $ Year_opened__1991 <dbl> …
## $ Year_opened__1992 <dbl> …
## $ Year_opened__1993 <dbl> …
## $ Year_opened__1994 <dbl> …
## $ `Year_opened__-OTHER` <dbl> …
## $ DOMUS_Subject_Matter__agriculture <dbl> …
## $ DOMUS_Subject_Matter__archaeology <dbl> …
## $ DOMUS_Subject_Matter__archives <dbl> …
## $ DOMUS_Subject_Matter__biology_and_natural_history <dbl> …
## $ DOMUS_Subject_Matter__costume_and_textiles <dbl> …
## $ DOMUS_Subject_Matter__decorative_and_applied_arts <dbl> …
## $ DOMUS_Subject_Matter__fine_art <dbl> …
## $ DOMUS_Subject_Matter__maritime <dbl> …
## $ DOMUS_Subject_Matter__military <dbl> …
## $ DOMUS_Subject_Matter__mixed_collection <dbl> …
## $ DOMUS_Subject_Matter__personalia <dbl> …
## $ DOMUS_Subject_Matter__science_and_industry <dbl> …
## $ DOMUS_Subject_Matter__social_history <dbl> …
## $ DOMUS_Subject_Matter__transport <dbl> …
## $ `DOMUS_Subject_Matter__-OTHER` <dbl> …
## $ Primary_provenance_of_data__domus <dbl> …
## $ `Primary_provenance_of_data__-OTHER` <dbl> …
## $ `Area_Deprivation_index_crime__-Inf_2` <dbl> …
## $ Area_Deprivation_index_crime__2_4 <dbl> …
## $ Area_Deprivation_index_crime__4_7 <dbl> …
## $ Area_Deprivation_index_crime__7_Inf <dbl> …
## $ `Area_Deprivation_index_education__-Inf_4` <dbl> …
## $ Area_Deprivation_index_education__4_6 <dbl> …
## $ Area_Deprivation_index_education__6_8 <dbl> …
## $ Area_Deprivation_index_education__8_Inf <dbl> …
## $ `Area_Deprivation_index_employment__-Inf_3` <dbl> …
## $ Area_Deprivation_index_employment__3_6 <dbl> …
## $ Area_Deprivation_index_employment__6_8 <dbl> …
## $ Area_Deprivation_index_employment__8_Inf <dbl> …
## $ `Area_Deprivation_index_health__-Inf_3` <dbl> …
## $ Area_Deprivation_index_health__3_5 <dbl> …
## $ Area_Deprivation_index_health__5_8 <dbl> …
## $ Area_Deprivation_index_health__8_Inf <dbl> …
## $ `Area_Deprivation_index_housing__-Inf_3` <dbl> …
## $ Area_Deprivation_index_housing__3_6 <dbl> …
## $ Area_Deprivation_index_housing__6_Inf <dbl> …
## $ `Area_Deprivation_index_income__-Inf_4` <dbl> …
## $ Area_Deprivation_index_income__4_6 <dbl> …
## $ Area_Deprivation_index_income__6_8 <dbl> …
## $ Area_Deprivation_index_income__8_Inf <dbl> …
## $ `Area_Deprivation_index_services__-Inf_3` <dbl> …
## $ Area_Deprivation_index_services__3_5 <dbl> …
## $ Area_Deprivation_index_services__5_8 <dbl> …
## $ Area_Deprivation_index_services__8_Inf <dbl> …
## $ Area_Geodemographic_subgroup__Affluent_rural <dbl> …
## $ Area_Geodemographic_subgroup__Ageing_Coastal_Living <dbl> …
## $ Area_Geodemographic_subgroup__Country_Living <dbl> …
## $ Area_Geodemographic_subgroup__Ethnically_Diverse_Metropolitan_Living <dbl> …
## $ Area_Geodemographic_subgroup__Expanding_Areas <dbl> …
## $ `Area_Geodemographic_subgroup__Industrial_and_Multi-ethnic` <dbl> …
## $ Area_Geodemographic_subgroup__Larger_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_subgroup__London_Cosmopolitan <dbl> …
## $ Area_Geodemographic_subgroup__Manufacturing_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Mining_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Northern_Ireland_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__Older_Farming_Communities <dbl> …
## $ Area_Geodemographic_subgroup__Prosperous_Towns <dbl> …
## $ `Area_Geodemographic_subgroup__Rural-Urban_Fringe` <dbl> …
## $ Area_Geodemographic_subgroup__Rural_Growth_Areas <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__Scottish_Industrial_Legacy <dbl> …
## $ Area_Geodemographic_subgroup__Seaside_Living <dbl> …
## $ Area_Geodemographic_subgroup__Service_Economy <dbl> …
## $ Area_Geodemographic_subgroup__Sparse_English_and_Welsh_Countryside <dbl> …
## $ Area_Geodemographic_subgroup__University_Towns_and_Cities <dbl> …
## $ Area_Geodemographic_subgroup__Urban_Living <dbl> …
## $ `Area_Geodemographic_subgroup__-OTHER` <dbl> …
# Step 2: Correlation
museums_correlation <- museums_binarized %>%
correlate(Accreditation__Accredited)
museums_correlation %>% glimpse()
## Rows: 150
## Columns: 3
## $ feature <fct> Accreditation, Accreditation, Governance, Size, Year_opene…
## $ bin <chr> "Accredited", "Unaccredited", "Independent-Unknown", "smal…
## $ correlation <dbl> 1.00000000, -1.00000000, -0.19751976, -0.15704734, 0.13432…
# Step 3: Plot
museums_correlation %>%
correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 113 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps