INFO6270 - Lab 4

Author

Veronica Kerrigan

Published

February 26, 2023

Dataset

Importing and cleaning the dataset

library(readxl)
library(tidyverse)

week_one_messy <- read_excel("~/Desktop/BFI_weekend_office/bfi-weekend-box-office-report-2025-01-03-06.xls")

week_one_messy <- week_one_messy[,c(2:5,7:8)] %>%
  mutate(...7 = str_remove_all(...7, "1[:digit:]")) %>%
  filter(str_detect(...7, "1"))

week_two_messy <- read_excel("~/Desktop/BFI_weekend_office/bfi-weekend-box-office-report-2025-01-10-12.xls")

week_two_messy <- week_two_messy[,c(2:5,7:8)] %>%
  mutate(...7 = str_remove_all(...7, "1[:digit:]")) %>%
  filter(str_detect(...7, "1"))

week_three_messy <- read_excel("~/Desktop/BFI_weekend_office/bfi-weekend-box-office-report-2025-01-17-19.xls")

week_three_messy <- week_three_messy[,c(2:5,7:8)] %>%
  mutate(...7 = str_remove_all(...7, "1[:digit:]")) %>%
  filter(str_detect(...7, "1"))
week_four_messy <- read_excel("~/Desktop/BFI_weekend_office/bfi-weekend-box-office-report-2025-01-24-26.xls")

week_four_messy <- week_four_messy[,c(2:5,7:8)] %>%
  mutate(...7 = str_remove_all(...7, "1[:digit:]")) %>%
  filter(str_detect(...7, "1"))

jan_uk_cinemas_opening_weekend <- bind_rows(week_one_messy, week_two_messy, week_three_messy, week_four_messy) 

jan_uk_cinemas_opening_weekend <- tibble(jan_uk_cinemas_opening_weekend) %>%
  mutate(rename(jan_uk_cinemas_opening_weekend, films = ...1)) %>%
  mutate(rename(jan_uk_cinemas_opening_weekend, country_of_origin = ...2)) %>%
  mutate(rename(jan_uk_cinemas_opening_weekend, weekend_gross = ...3)) %>%
  mutate(rename(jan_uk_cinemas_opening_weekend, distributor = ...4)) %>%
  mutate(rename(jan_uk_cinemas_opening_weekend, cinemas = ...6)) %>%
  select(films,country_of_origin,distributor,cinemas, weekend_gross)

jan_uk_cinemas_opening_weekend <- jan_uk_cinemas_opening_weekend %>%
  mutate(country_of_origin = str_replace(country_of_origin, "India", "ind")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "France", "fra")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Canada", "can")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "China", "chn")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Denmark", "dnk")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Den", "dnk")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Germany", "deu")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Ger", "deu")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Belgium", "bel")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Estonia", "est")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Hong Kong", "hkg")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Italy", "ita")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Turkey", "tur")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Switzerland", "CHE")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "South Korea", "KOR")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Lithuania", "ltu")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Netherlands", "nld")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "Hungary", "hun")) %>%
  mutate(country_of_origin = str_replace(country_of_origin, "New Zealand", "nzl")) %>%
  mutate(country_of_origin = str_to_upper(country_of_origin)) %>%
  mutate(country_of_origin = str_remove(country_of_origin, "/FRANCE")) %>%
  mutate(films = str_remove(films, "\\(.*\\)"))

jan_uk_cinemas_opening_weekend <- jan_uk_cinemas_opening_weekend %>%
  mutate(weekend_gross = as.numeric(weekend_gross)) %>%
  mutate(cinemas = as.numeric(cinemas))
  
jan_uk_cinemas_opening_weekend <- jan_uk_cinemas_opening_weekend %>%
  mutate(arrange(jan_uk_cinemas_opening_weekend, weekend_gross))

Overview of the dataset

Gross Weekend Earnings: UK Cinemas
films country_of_origin distributor cinemas weekend_gross
Kudumbasthan IND 2G Entertainments 1 3
Putin POL Kinostar 1 123
The People Before UK Miracle/Sea High Productions 11 206
Vanangaan IND 2G Entertainments 1 252
Sampiyonlar TUR Cinedex OG 3 295
Captain Kronos: Vampire Hunter UK Verve/Hammer 1 364
Karantina TUR Cinedex OG 3 480
Choo Mantar IND 2G Entertainments 6 637
Communist Pacha Adhava Appa IND 2G Entertainments 10 785
Chaalchitro: The Frame Fatale IND Bollywood Films 14 831

Variables summary

Below is a table which displays the variable types for the films which had an UK opening weekend in January 2025.

Variables in the UK Cinemas Opening Weekend Dataset
Variable Type Description
films Nominal Films
country_of_origin Nominal Country of Origin
distributor Nominal Films Distributor
cinemas Continuous Number of UK Cinemas
weekend_gross Continuous Weekend Gross Earnings, in pounds

Frequency tables

The first table shows the frequency of film distribution companies for films with a January 2025 UK opening weekend. The second table displays the least to most common counties/country of origin for those same films.

Frequency of Film Distributor
distributor freq pct
AA Films UK 1 1.4
AF-Media 1 1.4
Big Films 1 1.4
Blue Eyes Films 1 1.4
Bollywood Films 1 1.4
Central City Media 1 1.4
CinemaLive 1 1.4
Entertainment 1 1.4
Independent 1 1.4
Jura 1 1.4
Kinostar 1 1.4
Lionsgate 1 1.4
MUBI 1 1.4
Magnetes 1 1.4
Masala Life 1 1.4
Miracle/Dazzler 1 1.4
Miracle/Sea High Productions 1 1.4
Modern Films 1 1.4
Parkland Entertainment 1 1.4
Trafalgar 1 1.4
Trinity/Cine Asia 1 1.4
Vertical/Miracle 1 1.4
Vertigo 1 1.4
Verve/Hammer 1 1.4
Vue Entertainment 1 1.4
Altitude 2 2.8
BFI 2 2.8
CMC Pictures 2 2.8
Curzon/Amazon MGM 2 2.8
DG Tech 2 2.8
Disney 2 2.8
Magic Light Pictures 2 2.8
Moviegoers 2 2.8
StudioCanal 2 2.8
Warner Bros 2 2.8
Zee Studios 2 2.8
Cinedex OG 3 4.2
RFT Films 3 4.2
Universal 3 4.2
Dreamz Entertainment 5 7.0
2G Entertainments 10 14.1
Frequency of Country of Origin
country_of_origin freq pct
CAN/UK 1 1.4
CHE/ITA/UK 1 1.4
CHN/HKG 1 1.4
DNK/POL/SWE 1 1.4
EST/DNK/DEU/NLD/CHN/FRA 1 1.4
FRA 1 1.4
FRA/BEL/ITA 1 1.4
FRA/USA 1 1.4
HKG 1 1.4
IND/UK 1 1.4
ITA/DEU/CHL/USA 1 1.4
ITA/FRA 1 1.4
KOR 1 1.4
LTU 1 1.4
UK/CAN/USA 1 1.4
UK/FRA 1 1.4
UK/IRE/ICE/BEL 1 1.4
UK/USA/FRA/DEU 1 1.4
USA/HUN/UK 1 1.4
USA/NED 1 1.4
USA/NZL 1 1.4
USA/POL 1 1.4
USA/UK 1 1.4
USA/UK/CZE 1 1.4
CHN 2 2.8
POL 2 2.8
TUR 4 5.6
UK 6 8.5
USA 6 8.5
IND 27 38.0

Descriptive statistics

This table shows the statistics of the opening weekend earnings and cinema distribution for films showing in the UK.

Cinema and Weekend Gross Earning Statistic
variable n mean sd var q1 median q3 min max
cinemas 71 109.7606 182.4225 3.327798e+04 7.5 28 100.5 1 696
weekend_gross 71 249766.0845 778910.2996 6.067013e+11 2220.5 13034 65031.0 3 5246910