library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(gghighlight)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(stringr)
library(lubridate)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
sessionInfo()
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8 
## [2] LC_CTYPE=English_United Kingdom.utf8   
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.utf8    
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0  knitr_1.45        psych_2.4.1       gghighlight_0.4.1
##  [5] lubridate_1.9.3   forcats_1.0.0     stringr_1.5.1     dplyr_1.1.4      
##  [9] purrr_1.0.2       readr_2.1.5       tidyr_1.3.1       tibble_3.2.1     
## [13] ggplot2_3.4.4     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.8        utf8_1.2.4        generics_0.1.3    xml2_1.3.6       
##  [5] stringi_1.8.3     lattice_0.21-9    hms_1.1.3         digest_0.6.34    
##  [9] magrittr_2.0.3    evaluate_0.23     grid_4.3.2        timechange_0.3.0 
## [13] fastmap_1.1.1     jsonlite_1.8.8    fansi_1.0.6       viridisLite_0.4.2
## [17] scales_1.3.0      jquerylib_0.1.4   mnormt_2.1.1      cli_3.6.2        
## [21] rlang_1.1.3       munsell_0.5.0     withr_3.0.0       cachem_1.0.8     
## [25] yaml_2.3.8        tools_4.3.2       parallel_4.3.2    tzdb_0.4.0       
## [29] colorspace_2.1-0  vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4  
## [33] pkgconfig_2.0.3   pillar_1.9.0      bslib_0.6.1       gtable_0.3.4     
## [37] glue_1.7.0        systemfonts_1.0.5 xfun_0.41         tidyselect_1.2.0 
## [41] rstudioapi_0.15.0 htmltools_0.5.7   nlme_3.1-163      rmarkdown_2.25   
## [45] svglite_2.1.3     compiler_4.3.2
sales <- read.csv("orders (6).csv")
tickets <- read.csv("tickets.csv")

Top 10 cities by number of attendees

sales$Shipping.city <- tolower(sales$Shipping.city)

cities <- sales %>%
  select(Number,
         City = Shipping.city) %>%
  mutate_if(is.character,str_trim) %>%
  arrange(City) %>%
  group_by(City) %>%
  count()
colnames(cities)[2] <- "Total"

top10_cities <- cities[order(desc(cities$Total)),] 
top10_cities$City_Se <- str_to_sentence(top10_cities$City)
top10_cities <- as.data.frame(head(top10_cities, 10)) %>%
  select(City = City_Se,
         Total)

ggplot() +
  geom_col(top10_cities,
           mapping = aes(x = reorder(City, -Total),
                         y = Total),
           fill = "black") +
  labs(x = "City",
       y = "Tickets Sold",
       title = "Top 10 attendee origen") +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, 45)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Timeline of ticket sales

Timeline <- sales %>%
  select(Date) %>%
  group_by(Date) %>%
  count() 
colnames(Timeline)[2] <- "Total"
Timeline$date <- dmy(Timeline$Date)

highlight_max <- Timeline[which.max(Timeline$Total),]
highlight_max$xend <- as.Date("2023-11-20")
highlight_max$xend1 <- as.Date("2023-11-15")
highlight_max$yend <- 44
highlight_max$label <- "49 tickets sold on 17/11/23"

second_max <- arrange(Timeline, desc(Total))
second_max <- second_max[2,]
second_max$xend <- as.Date("2023-11-19")
second_max$xend1 <- as.Date("2023-11-26")
second_max$yend <- 15
second_max$label <- "23 tickets sold on 19/11/23"

ordered <- arrange(Timeline, desc(Total))
top10_days <- head(ordered, 10)

ggplot() +
  geom_line(Timeline,
            mapping = aes(x = date,
                          y = Total)) +
  geom_point(top10_days,
             mapping = (aes(x = date,
                            y = Total,
                            color = "Top 10 Days")),
             color = "black",
             size = 3) +
  geom_segment(highlight_max,
               mapping = aes(xend = xend, 
                             yend = yend,
                             x = highlight_max$date,
                             y = highlight_max$Total),
               colour = "green") +
  geom_label(aes(label = highlight_max$label,
                 x = highlight_max$xend1,
                 y = highlight_max$yend),
             hjust = 0, 
             vjust = 1, 
             fill = "green", 
             colour = "black", 
             alpha= 1) +
  geom_segment(second_max,
               mapping = aes(xend = xend1, 
                             yend = yend,
                             x = date,
                             y = Total),
               colour = "red") +
  geom_label(aes(label = second_max$label,
                 x = second_max$xend1,
                 y = second_max$yend),
             hjust = 0, 
             vjust = 1, 
             fill = "red", 
             colour = "black", 
             alpha= 1) +
  labs(title = "Timeline of Sales",
       x = "Date",
       y = "Total Sales") +
  scale_x_date(date_breaks = "1 month",
               minor_breaks = "1 day",
               date_labels =  "%b %Y",
               limits = c(as.Date("2023-11-15"), NA)) +
  scale_y_continuous(expand = c(0, 0)) +
  coord_cartesian(expand = FALSE,
                  ylim = c(0, 50)) +
  theme_bw()

top_10_days <- top10_days %>%
  select(Total) %>%
  kbl() %>%
  kable_paper(full_width = F,
                position = "center") 
## Adding missing grouping variables: `Date`
top_10_days
Date Total
17/11/2023 49
19/11/2023 23
18/11/2023 18
16/01/2024 12
21/01/2024 10
26/01/2024 10
05/02/2024 8
17/01/2024 8
25/01/2024 8
18/01/2024 7

On the day Attendance and Drop

tickets1 <- tickets %>%
  select(Bought = Item.count,
         Attended = Attended) %>%
  summarise(sum(Bought),
            sum(Attended))
colnames(tickets1)[1] <- "Bought"
colnames(tickets1)[2] <- "Attended"
tickets1$AvgAttend <- round((tickets1$Attended/tickets1$Bought *100),2)
tickets1$Drop <- (100 - tickets1$AvgAttend)

Intake <- tickets1 %>%
  select(Bought,
         Attended,
         Percentage_Atteded = AvgAttend,
         Drop_Percentage = Drop) %>%
  kbl() %>%
  kable_paper(full_width = F,
              position = "center") 
  
Intake
Bought Attended Percentage_Atteded Drop_Percentage
414 369 89.13 10.87