library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
# --- 1. Load the Stories CSV ---
stories <- read_csv(
  "~/Downloads/Detailed_Story_Performance_01-11-2025_to_15-11-2025.CSV",
  show_col_types = FALSE,
  locale = locale(encoding = "latin1")   # fixes weird characters
)

# Clean column names
stories <- clean_names(stories)

# --- 2. Parse the time posted column ---
stories <- stories %>%
  mutate(
    time_posted = parse_date_time(time_posted,
                                  orders = c("b d, Y I:M:S p", "b d, y I:M:S p")),
    hour = hour(time_posted),
    wday = wday(time_posted, label = TRUE)
  )

# --- 3. Compute engagement rate ---
stories <- stories %>%
  mutate(
    engagement_rate = replies + back + forward  # you can modify this definition
  )

# --- 4. Engagement by hour (Peak Posting Window) ---
eng_hour <- stories %>%
  group_by(hour) %>%
  summarise(
    avg_views = mean(views, na.rm = TRUE),
    avg_reach = mean(reach, na.rm = TRUE),
    avg_engagement = mean(engagement_rate, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(desc(avg_engagement))

print(eng_hour)
## # A tibble: 12 × 5
##     hour avg_views avg_reach avg_engagement     n
##    <int>     <dbl>     <dbl>          <dbl> <int>
##  1    15     4259.     3224.          2870      3
##  2     1     4335.     3283.          2833.     3
##  3    23     3810.     2885           2499.    12
##  4    22     3256.     2450.          2302.     7
##  5    13     2472      1888.          2012.     2
##  6    12     2357      1689.          1738.     3
##  7    14     2266      1803           1537      1
##  8    21     2250.     1789.          1411      4
##  9    11     2274      1767           1408.     2
## 10    18     1952      1560           1203      1
## 11    20     1647      1300           1142      3
## 12    16     1918      1624            881      1
# --- 5. Plot: How engagement varies by time of day ---
ggplot(eng_hour, aes(hour, avg_engagement)) +
  geom_line(color = "blue") +
  geom_point() +
  labs(
    title = "Average Story Engagement by Posting Hour",
    x = "Hour of Day",
    y = "Avg Engagement"
  ) +
  theme_minimal()

# --- 6. Engagement by Day of Week ---
eng_day <- stories %>%
  group_by(wday) %>%
  summarise(
    avg_eng = mean(engagement_rate, na.rm = TRUE),
    avg_views = mean(views, na.rm = TRUE),
    n = n()
  )

print(eng_day)
## # A tibble: 6 × 4
##   wday  avg_eng avg_views     n
##   <ord>   <dbl>     <dbl> <int>
## 1 Sun     2887      4433.     3
## 2 Mon     1050.     1627      6
## 3 Wed     2295.     3518.     9
## 4 Thu     2665.     4005.     7
## 5 Fri     1824.     2696.     7
## 6 Sat     2091.     2930.    10
ggplot(eng_day, aes(wday, avg_eng)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Average Engagement Rate by Day of Week",
    x = "Day",
    y = "Avg Engagement"
  ) +
  theme_minimal()

# --- 7. Statistical test: Do posting times predict engagement? ---
model <- lm(engagement_rate ~ hour + wday, data = stories)
summary(model)
## 
## Call:
## lm(formula = engagement_rate ~ hour + wday, data = stories)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1448.61  -231.33     6.24   171.70  1153.48 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2276.274    301.949   7.539 7.82e-09 ***
## hour          -7.432     15.317  -0.485 0.630556    
## wday.L      -186.037    231.790  -0.803 0.427616    
## wday.Q       233.436    212.296   1.100 0.279022    
## wday.C      -860.238    227.376  -3.783 0.000581 ***
## wday^4      1163.314    199.633   5.827 1.30e-06 ***
## wday^5       -42.806    185.893  -0.230 0.819219    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 498.6 on 35 degrees of freedom
## Multiple R-squared:  0.5724, Adjusted R-squared:  0.4991 
## F-statistic: 7.808 on 6 and 35 DF,  p-value: 2.238e-05