library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(gghighlight)
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(stringr)
library(lubridate)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
sessionInfo()
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8
## [2] LC_CTYPE=English_United Kingdom.utf8
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.utf8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.4.0 knitr_1.45 psych_2.4.1 gghighlight_0.4.1
## [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [13] ggplot2_3.4.4 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.8 utf8_1.2.4 generics_0.1.3 xml2_1.3.6
## [5] stringi_1.8.3 lattice_0.21-9 hms_1.1.3 digest_0.6.34
## [9] magrittr_2.0.3 evaluate_0.23 grid_4.3.2 timechange_0.3.0
## [13] fastmap_1.1.1 jsonlite_1.8.8 fansi_1.0.6 viridisLite_0.4.2
## [17] scales_1.3.0 jquerylib_0.1.4 mnormt_2.1.1 cli_3.6.2
## [21] rlang_1.1.3 munsell_0.5.0 withr_3.0.0 cachem_1.0.8
## [25] yaml_2.3.8 tools_4.3.2 parallel_4.3.2 tzdb_0.4.0
## [29] colorspace_2.1-0 vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4
## [33] pkgconfig_2.0.3 pillar_1.9.0 bslib_0.6.1 gtable_0.3.4
## [37] glue_1.7.0 systemfonts_1.0.5 xfun_0.41 tidyselect_1.2.0
## [41] rstudioapi_0.15.0 htmltools_0.5.7 nlme_3.1-163 rmarkdown_2.25
## [45] svglite_2.1.3 compiler_4.3.2
sales <- read.csv("orders (6).csv")
tickets <- read.csv("tickets.csv")
Top 10 cities by number of attendees
sales$Shipping.city <- tolower(sales$Shipping.city)
cities <- sales %>%
select(Number,
City = Shipping.city) %>%
mutate_if(is.character,str_trim) %>%
arrange(City) %>%
group_by(City) %>%
count()
colnames(cities)[2] <- "Total"
top10_cities <- cities[order(desc(cities$Total)),]
top10_cities$City_Se <- str_to_sentence(top10_cities$City)
top10_cities <- as.data.frame(head(top10_cities, 10)) %>%
select(City = City_Se,
Total)
ggplot() +
geom_col(top10_cities,
mapping = aes(x = reorder(City, -Total),
y = Total),
fill = "black") +
labs(x = "City",
y = "Tickets Sold",
title = "Top 10 attendee origen") +
scale_y_continuous(expand = c(0,0),
limits = c(0, 45)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Timeline of ticket sales
Timeline <- sales %>%
select(Date) %>%
group_by(Date) %>%
count()
colnames(Timeline)[2] <- "Total"
Timeline$date <- dmy(Timeline$Date)
highlight_max <- Timeline[which.max(Timeline$Total),]
highlight_max$xend <- as.Date("2023-11-20")
highlight_max$xend1 <- as.Date("2023-11-15")
highlight_max$yend <- 44
highlight_max$label <- "49 tickets sold on 17/11/23"
second_max <- arrange(Timeline, desc(Total))
second_max <- second_max[2,]
second_max$xend <- as.Date("2023-11-19")
second_max$xend1 <- as.Date("2023-11-26")
second_max$yend <- 15
second_max$label <- "23 tickets sold on 19/11/23"
ordered <- arrange(Timeline, desc(Total))
top10_days <- head(ordered, 10)
ggplot() +
geom_line(Timeline,
mapping = aes(x = date,
y = Total)) +
geom_point(top10_days,
mapping = (aes(x = date,
y = Total,
color = "Top 10 Days")),
color = "black",
size = 3) +
geom_segment(highlight_max,
mapping = aes(xend = xend,
yend = yend,
x = highlight_max$date,
y = highlight_max$Total),
colour = "green") +
geom_label(aes(label = highlight_max$label,
x = highlight_max$xend1,
y = highlight_max$yend),
hjust = 0,
vjust = 1,
fill = "green",
colour = "black",
alpha= 1) +
geom_segment(second_max,
mapping = aes(xend = xend1,
yend = yend,
x = date,
y = Total),
colour = "red") +
geom_label(aes(label = second_max$label,
x = second_max$xend1,
y = second_max$yend),
hjust = 0,
vjust = 1,
fill = "red",
colour = "black",
alpha= 1) +
labs(title = "Timeline of Sales",
x = "Date",
y = "Total Sales") +
scale_x_date(date_breaks = "1 month",
minor_breaks = "1 day",
date_labels = "%b %Y",
limits = c(as.Date("2023-11-15"), NA)) +
scale_y_continuous(expand = c(0, 0)) +
coord_cartesian(expand = FALSE,
ylim = c(0, 50)) +
theme_bw()

top_10_days <- top10_days %>%
select(Total) %>%
kbl() %>%
kable_paper(full_width = F,
position = "center")
## Adding missing grouping variables: `Date`
top_10_days
Date
|
Total
|
17/11/2023
|
49
|
19/11/2023
|
23
|
18/11/2023
|
18
|
16/01/2024
|
12
|
21/01/2024
|
10
|
26/01/2024
|
10
|
05/02/2024
|
8
|
17/01/2024
|
8
|
25/01/2024
|
8
|
18/01/2024
|
7
|
On the day Attendance and Drop
tickets1 <- tickets %>%
select(Bought = Item.count,
Attended = Attended) %>%
summarise(sum(Bought),
sum(Attended))
colnames(tickets1)[1] <- "Bought"
colnames(tickets1)[2] <- "Attended"
tickets1$AvgAttend <- round((tickets1$Attended/tickets1$Bought *100),2)
tickets1$Drop <- (100 - tickets1$AvgAttend)
Intake <- tickets1 %>%
select(Bought,
Attended,
Percentage_Atteded = AvgAttend,
Drop_Percentage = Drop) %>%
kbl() %>%
kable_paper(full_width = F,
position = "center")
Intake
Bought
|
Attended
|
Percentage_Atteded
|
Drop_Percentage
|
414
|
369
|
89.13
|
10.87
|