# clear-up the environment
rm(list = ls())

# chunk options
knitr::opts_chunk$set(
  message = FALSE,
  warning = FALSE,
  fig.align = "center",
  comment = "#>"
)

Data Preparation

library(dplyr)
library(ggplot2)
library(scales)
library(glue)
library(plotly)
library(lubridate)

# read data
youtube <- read.csv("data_input/youtubetrends_2023.csv")

# data filter engagement
yt_summary <- youtube %>%
  group_by(publish_wday) %>%
  summarise(avg_likes = mean (likes, na.rm = TRUE),
            avg_comments = mean(comment_count, na.rm = TRUE))

# data filter disabled features
yt_disabled <- youtube %>%
  summarise(Coments_Disabled = sum(comments_disabled) / n() * 100,
            Ratings_Disabled = sum(ratings_disabled) / n() * 100)

📊 Plot 1: Time Taken to Trend vs. Publish Time

ggplot(youtube, aes(x = publish_hour, y = timetotrend)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Time Taken to Trend vs. Publish Hour",
       x = "Publish Hour",
       y = "Time to Trend (Days)") +
  theme_minimal()

📊 Plot 2: Engagement Metrics by Publish Day

ggplot(yt_summary, aes(x = reorder(publish_wday, avg_likes), y = avg_likes)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Average Likes by Publish Day",
       x = "Publish Day",
       y = "Average Likes") +
  theme_minimal()

📊 Plot 3: Disabled Features Analysis

barplot(as.matrix(yt_disabled), beside = TRUE, col = c("red","blue"),
        names.arg = c("Comments Disabled", "Ratings Disabled"),
        main = "Percentage of Videos with Disabled Features",
        ylab = "Percentage")