summary <- read_excel(
"raw_data/BridgeofAllanCharityBonfireNight_2021-11-08.xlsx",
sheet = "Daily Summary")
event1356 <- read_excel(
"raw_data/BridgeofAllanCharityBonfireNight_2021-11-08.xlsx",
sheet = "Event1356",
skip = 7) %>%
clean_names()
event1356 <- event1356[-1,]
uk_postcode <- read_csv("raw_data/ukpostcodes.csv")
combine <- left_join(event1356, uk_postcode, by = "postcode")
# mistake in dataframe had Jersey at north pole
uk_postcode[uk_postcode$postcode=="JE3 2DX", "latitude"] <- 49.2534
uk_postcode[uk_postcode$postcode=="JE3 2DX", "longitude"] <- -2.2494
non_uk_postcode <- data.frame(postcode = c("69005", "20159", "FK5 4",
"08015", "75018", "97330",
"FK8 -BO", "74960"),
latitude = c("45.7583", "45.4982", "56.0267",
"41.3786", "48.8913", "44.6385",
"56.1492", "35.7837"),
longitude = c("4.7991", "9.1918", "-3.8406",
"2.1524", "2.3530", "-123.2929",
"-4.3579", "-94.6037")) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude))
leaf_map <- event1356 %>%
mutate(postcode = str_to_upper(postcode),
postcode = trimws(postcode),
postcode = ifelse(nchar(postcode) < 6,
postcode,
ifelse(str_detect(postcode, "\\s"),
postcode,
paste0(substr(postcode, start = 1, stop = (nchar(postcode)-3)),
" ", substr(postcode, start = (nchar(postcode)-2),
stop = nchar(postcode))))),
postcode = case_when( postcode == "FK10 WTF" ~ "FK10 2TF",
postcode == "PH336 XE" ~ "PH33 6XE",
postcode == "FK9 5 JF" ~ "FK9 5JF",
postcode == "FK10 3 NR" ~ "FK10 3NR",
postcode == "ML9 1AQ" ~ "ML9 1AQ",
TRUE ~ postcode)
) %>%
left_join(uk_postcode, by = "postcode") %>%
left_join(non_uk_postcode, by = "postcode"
) %>%
mutate(latitude = coalesce(latitude.x, latitude.y),
longitude = coalesce(longitude.x, longitude.y)) %>%
select(purchased_on, postcode, adult_ticket,
child_ticket_age_5_16,concession_ticket,
family_discount_ticket_2_adults_and_2_children_5_16,
family_discount_ticket_2_adults_and_3_children_5_16,
student_bundle_4_x_students_with_id,
net_order_revenue_to_organiser, latitude, longitude)
Student Bundle - Is 4 students
Family Bundle - Is a mix of 2 Adults and either 2 or 3 Kids
leaf_map %>%
mutate(date = substr(purchased_on,
start = 6, stop = 10)) %>%
pivot_longer(c(3:8),
names_to = "ticket_type",
values_to = "ticket_count") %>%
mutate(
ticket_type = case_when(
ticket_type == "adult_ticket" ~ "Adult",
ticket_type == "child_ticket_age_5_16" ~ "Child",
ticket_type == "concession_ticket" ~ "Concession",
ticket_type == "family_discount_ticket_2_adults_and_2_children_5_16" ~ "Family Bundle",
ticket_type == "family_discount_ticket_2_adults_and_3_children_5_16" ~ "Family Bundle",
ticket_type == "student_bundle_4_x_students_with_id" ~ "Student Bundle",
TRUE ~ ticket_type)
) %>%
filter(!is.na(ticket_count)) %>%
select( ticket_count, ticket_type) %>%
group_by( ticket_type) %>%
summarise(ticket_count = sum(ticket_count), .groups = 'drop') %>%
ggplot(aes(y = ticket_type, x = ticket_count, fill = ticket_type)) +
geom_col(colour = "black") +
scale_fill_brewer(palette = "Reds") +
labs(title = "Ticket Sales",
y = "Ticket Type",
x = "Tickets Sold",
fill = "Ticket Type") +
theme(plot.title = element_text(hjust = 0.5))
Ticket sales by type predominantly Adult tickets being sold, normally we thought we had a higher number of kids but this will be mixed in with the Family Bundle numbers.
leaf_map %>%
mutate(date = substr(purchased_on,
start = 6, stop = 10)) %>%
pivot_longer(c(3:8),
names_to = "ticket_type",
values_to = "ticket_count") %>%
mutate(
ticket_type = case_when(
ticket_type == "adult_ticket" ~ "Adult",
ticket_type == "child_ticket_age_5_16" ~ "Child",
ticket_type == "concession_ticket" ~ "Concession",
ticket_type == "family_discount_ticket_2_adults_and_2_children_5_16" ~ "Family Bundle",
ticket_type == "family_discount_ticket_2_adults_and_3_children_5_16" ~ "Family Bundle",
ticket_type == "student_bundle_4_x_students_with_id" ~ "Student Bundle",
TRUE ~ ticket_type)
) %>%
filter(!is.na(ticket_count)) %>%
select(date, ticket_count, ticket_type) %>%
group_by(date, ticket_type) %>%
summarise(ticket_count = sum(ticket_count), .groups = 'drop') %>%
ggplot(aes(y = date, x = ticket_count, fill = ticket_type)) +
geom_col(colour = "black") +
scale_fill_brewer(palette = "Reds") +
labs(title = "Ticket Sales",
y = "Date",
x = "Tickets Sold",
fill = "Ticket Type") +
theme(plot.title = element_text(hjust = 0.5))
Showing the sales per day and per type. Steady sale thorugh the month, real ramp up of sales in the last few days near the event when people would have a better idea of the whether.
Location have been determined by the postcode give, and will be where the payment card is addressed. Will have some like students can be living in Stirling but their card is registered to another home address. The Glasgow COP26 was happening at the same time so we will have had more people from outside of the area staying in hotel.
leaf_map %>%
separate(postcode, c("postcode_area", "postcode_inner"), " ") %>%
count(postcode_area) %>%
rename(count = n) %>%
arrange(desc(count))
Sale per postcodes. In order Alloa, south Stirling, Bridge of Allan, Dunblane and north Stirling make up the top 5 postcodes.
leaf_map %>%
separate(postcode, c("postcode_outer", "postcode_inner"), " ") %>%
mutate(postcode_outer = str_extract(postcode_outer, "^[A-Z]{1,}"),
areas = case_when(postcode_outer == "FK" ~ "FK - Falkirk",
postcode_outer == "G" ~ "G - Glasgow",
postcode_outer == "PH" ~ "PH - Perth",
postcode_outer == "EH" ~ "EH - Edinburgh",
postcode_outer == "ML" ~ "ML - Motherwell",
postcode_outer == "KY" ~ "KY - Kirkaldy",
postcode_outer == "DD" ~ "DD - Dundee",
is.na(postcode_outer) ~ "Non-UK",
postcode_outer == "AB" ~ "AB - Aberdeen",
postcode_outer == "IV" ~ "IV - Inverness",
postcode_outer == "KA" ~ "KA - Kilmarnock",
postcode_outer == "NE" ~ "NE - Newcastle",
postcode_outer == "PA" ~ "PA - Paisley",
postcode_outer == "BT" ~ "BT - Belfast",
postcode_outer == "DG" ~ "DG - Dumfries",
postcode_outer == "BH" ~ "BH - Bournemouth",
postcode_outer == "CW" ~ "CW - Crewe",
postcode_outer == "HD" ~ "HD - Huddersfield",
postcode_outer == "HG" ~ "HG - Harrogate",
postcode_outer == "JE" ~ "JE - Jersey",
postcode_outer == "LA" ~ "LA - Lancaster",
postcode_outer == "LN" ~ "LN - Lincoln",
postcode_outer == "LS" ~ "LS - Leeds",
postcode_outer == "N" ~ "N - London",
postcode_outer == "TS" ~ "TS - Cleveland",
postcode_outer == "WD" ~ "WD - Watford",
postcode_outer == "YO" ~ "YO - York",
TRUE ~ postcode_outer)
) %>%
count(areas) %>%
rename(count = n) %>%
arrange(desc(count))
Sales broken down to postcode area, shows the main bulk has come from Stirling, Falkirk and Alloa areas. We would lose some if not all of these next bonfires night when Falkirk and Alloa look to restart there fireworks.
leaf_map %>%
separate(postcode, c("postcode_area", "postcode_inner"), " ") %>%
filter(postcode_area %in% c("FK7", "FK8", "FK9", "FK15")) %>%
count(postcode_area) %>%
rename(count = n) %>%
arrange(desc(count))
Filtering down to our normal table area of Dunblane, Bridge of Allan and Stirling area is 812 out of 1720 sales about 47%. We would have some over lap with other areas. We had about 1500 sales from the FK postcode areas about 82% of our sales that we could see some of them again.
leaf_map %>%
separate(postcode, c("postcode_area", "postcode_inner"), " ") %>%
filter(postcode_area %in% c("FK7", "FK8", "FK9", "FK15")) %>%
mutate(date = substr(purchased_on,
start = 6, stop = 10)) %>%
pivot_longer(c(4:9),
names_to = "ticket_type",
values_to = "ticket_count") %>%
mutate(
ticket_type = case_when(
ticket_type == "adult_ticket" ~ "Adult",
ticket_type == "child_ticket_age_5_16" ~ "Child",
ticket_type == "concession_ticket" ~ "Concession",
ticket_type == "family_discount_ticket_2_adults_and_2_children_5_16" ~ "Family Bundle",
ticket_type == "family_discount_ticket_2_adults_and_3_children_5_16" ~ "Family Bundle",
ticket_type == "student_bundle_4_x_students_with_id" ~ "Student Bundle",
TRUE ~ ticket_type)
) %>%
filter(!is.na(ticket_count)) %>%
select( ticket_count, ticket_type) %>%
group_by( ticket_type) %>%
summarise(ticket_count = sum(ticket_count), .groups = 'drop') %>%
ggplot(aes(y = ticket_type, x = ticket_count, fill = ticket_type)) +
geom_col(colour = "black") +
scale_fill_brewer(palette = "Reds") +
scale_x_continuous(breaks = c(0, 100, 200, 300, 400, 500, 600,
700, 800, 900, 1000, 1100, 1200)) +
labs(title = "Dunblane, Bridge of Allan and Stirling Ticket Sales",
y = "Ticket Type",
x = "Tickets Sold") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none")
`%!in%` <- negate(`%in%`)
leaf_map %>%
separate(postcode, c("postcode_area", "postcode_inner"), " ") %>%
filter(postcode_area %!in% c("FK7", "FK8", "FK9", "FK15")) %>%
mutate(date = substr(purchased_on,
start = 6, stop = 10)) %>%
pivot_longer(c(4:9),
names_to = "ticket_type",
values_to = "ticket_count") %>%
mutate(
ticket_type = case_when(
ticket_type == "adult_ticket" ~ "Adult",
ticket_type == "child_ticket_age_5_16" ~ "Child",
ticket_type == "concession_ticket" ~ "Concession",
ticket_type == "family_discount_ticket_2_adults_and_2_children_5_16" ~ "Family Bundle",
ticket_type == "family_discount_ticket_2_adults_and_3_children_5_16" ~ "Family Bundle",
ticket_type == "student_bundle_4_x_students_with_id" ~ "Student Bundle",
TRUE ~ ticket_type)
) %>%
filter(!is.na(ticket_count)) %>%
select( ticket_count, ticket_type) %>%
group_by( ticket_type) %>%
summarise(ticket_count = sum(ticket_count), .groups = 'drop') %>%
ggplot(aes(y = ticket_type, x = ticket_count, fill = ticket_type)) +
geom_col(colour = "black") +
scale_fill_brewer(palette = "Reds") +
scale_x_continuous(breaks = c(0, 100, 200, 300, 400, 500, 600,
700, 800, 900, 1000, 1100, 1200)) +
labs(title = "Other Areas Ticket Sales",
y = "Ticket Type",
x = "Tickets Sold") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "none")
Both our normal areas and other areas are similar in the spread of the types of tickets.
leaflet(data = leaf_map) %>%
addTiles() %>%
addMarkers(label = ~ postcode,
popup = ~ postcode,
clusterOptions = markerClusterOptions())
The map shows we had a large number of people from outside our normal area, as mentioned some are students, people vising COP26 or tourists.
event1356 %>%
filter(!str_detect(organisation, "^(?i)na"),
!str_detect(organisation, "Road"),
!str_detect(organisation, "6"),
!str_detect(organisation, "kennedy"),
!str_detect(organisation, "Douglas"),
!str_detect(organisation, "Miss"),
!str_detect(organisation, "Select"),
!str_detect(organisation, "none")
) %>%
group_by(organisation) %>%
count(organisation) %>%
rename(count = n) %>%
arrange(desc(count))
35 people from 28 organisation fill this part in, with 1677 not using it so would be worth taking this field out next year.
Removed from organisation:
event1356 %>%
select(email) %>%
mutate(email = str_replace(email,
pattern = "^[a-zA-Z0-9\\._%-]+@",
replacement = ""),
email = str_to_lower(email)) %>%
group_by(email) %>%
count(email) %>%
arrange(desc(n))
84 unique email domain. Coming from 48 providers or industry’s.
event1356 %>%
select(email) %>%
mutate(email = str_replace(email,
pattern = "^[a-zA-Z0-9\\._%-]+@",
replacement = ""),
email = str_to_lower(email),
email = str_replace_all(email,
pattern = "[a-z\\._%-]+.ac.uk",
replacement = "university"),
email = str_replace_all(email,
pattern = "[a-z]+.edu",
replacement = "education"),
email = str_replace_all(email,
pattern = "[a-z]+school[a-z]?",
replacement = "education"),
email = str_replace_all(email,
pattern = "[a-z]+academy[a-z\\.]?",
replacement = "education"),
email = str_replace_all(email,
pattern =
"[a-z]+.org",
replacement = "nonprofit"),
email = str_extract(email,
pattern ="^[a-zA-Z0-9_%-]+"),
email = case_when(email == "hotmail" ~ "microsoft",
email == "themcnaughts" ~ "microsoft",
email == "outlook" ~ "microsoft",
email == "live" ~ "microsoft",
email == "msn" ~ "microsoft",
email == "passport" ~ "microsoft",
email == "gmail" ~ "google",
email == "google" ~ "google",
email == "googlemail" ~ "google",
email == "icloud" ~ "apple",
email == "me" ~ "apple",
email == "mac" ~ "apple",
email == "love" ~ "aol",
email == "ygm" ~ "aol",
email == "games" ~ "aol",
email == "wow" ~ "aol",
email == "ymail" ~ "yahoo",
email == "cybg" ~ "virgin",
email == "blueyonder" ~ "virgin",
email == "virginmedia" ~ "virgin",
email == "btconnect" ~ "bt",
email == "btopenworld" ~ "bt",
email == "btinternet" ~ "bt",
email == "nonprofite" ~ "nonprofit",
TRUE ~ email
)
) %>%
group_by(email) %>%
count(email) %>%
arrange(desc(n))
3 Comments