library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# --- 1. Load the Stories CSV ---
stories <- read_csv(
"~/Downloads/Detailed_Story_Performance_01-11-2025_to_15-11-2025.CSV",
show_col_types = FALSE,
locale = locale(encoding = "latin1") # fixes weird characters
)
# Clean column names
stories <- clean_names(stories)
# --- 2. Parse the time posted column ---
stories <- stories %>%
mutate(
time_posted = parse_date_time(time_posted,
orders = c("b d, Y I:M:S p", "b d, y I:M:S p")),
hour = hour(time_posted),
wday = wday(time_posted, label = TRUE)
)
# --- 3. Compute engagement rate ---
stories <- stories %>%
mutate(
engagement_rate = replies + back + forward # you can modify this definition
)
# --- 4. Engagement by hour (Peak Posting Window) ---
eng_hour <- stories %>%
group_by(hour) %>%
summarise(
avg_views = mean(views, na.rm = TRUE),
avg_reach = mean(reach, na.rm = TRUE),
avg_engagement = mean(engagement_rate, na.rm = TRUE),
n = n()
) %>%
arrange(desc(avg_engagement))
print(eng_hour)
## # A tibble: 12 × 5
## hour avg_views avg_reach avg_engagement n
## <int> <dbl> <dbl> <dbl> <int>
## 1 15 4259. 3224. 2870 3
## 2 1 4335. 3283. 2833. 3
## 3 23 3810. 2885 2499. 12
## 4 22 3256. 2450. 2302. 7
## 5 13 2472 1888. 2012. 2
## 6 12 2357 1689. 1738. 3
## 7 14 2266 1803 1537 1
## 8 21 2250. 1789. 1411 4
## 9 11 2274 1767 1408. 2
## 10 18 1952 1560 1203 1
## 11 20 1647 1300 1142 3
## 12 16 1918 1624 881 1
# --- 5. Plot: How engagement varies by time of day ---
ggplot(eng_hour, aes(hour, avg_engagement)) +
geom_line(color = "blue") +
geom_point() +
labs(
title = "Average Story Engagement by Posting Hour",
x = "Hour of Day",
y = "Avg Engagement"
) +
theme_minimal()

# --- 6. Engagement by Day of Week ---
eng_day <- stories %>%
group_by(wday) %>%
summarise(
avg_eng = mean(engagement_rate, na.rm = TRUE),
avg_views = mean(views, na.rm = TRUE),
n = n()
)
print(eng_day)
## # A tibble: 6 × 4
## wday avg_eng avg_views n
## <ord> <dbl> <dbl> <int>
## 1 Sun 2887 4433. 3
## 2 Mon 1050. 1627 6
## 3 Wed 2295. 3518. 9
## 4 Thu 2665. 4005. 7
## 5 Fri 1824. 2696. 7
## 6 Sat 2091. 2930. 10
ggplot(eng_day, aes(wday, avg_eng)) +
geom_col(fill = "steelblue") +
labs(
title = "Average Engagement Rate by Day of Week",
x = "Day",
y = "Avg Engagement"
) +
theme_minimal()

# --- 7. Statistical test: Do posting times predict engagement? ---
model <- lm(engagement_rate ~ hour + wday, data = stories)
summary(model)
##
## Call:
## lm(formula = engagement_rate ~ hour + wday, data = stories)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1448.61 -231.33 6.24 171.70 1153.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2276.274 301.949 7.539 7.82e-09 ***
## hour -7.432 15.317 -0.485 0.630556
## wday.L -186.037 231.790 -0.803 0.427616
## wday.Q 233.436 212.296 1.100 0.279022
## wday.C -860.238 227.376 -3.783 0.000581 ***
## wday^4 1163.314 199.633 5.827 1.30e-06 ***
## wday^5 -42.806 185.893 -0.230 0.819219
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 498.6 on 35 degrees of freedom
## Multiple R-squared: 0.5724, Adjusted R-squared: 0.4991
## F-statistic: 7.808 on 6 and 35 DF, p-value: 2.238e-05