Loading the data

library(tidyverse)
posts <- read_csv('insta_posts.csv') %>%
  select(
    caption, id, ownerId, timestamp, type, commentsCount, likesCount, videoViewCount
  ) %>%
  mutate(
    id = as.factor(id),
    ownerId = as.factor(ownerId)
  )
summary(posts)
##    caption                            id           ownerId  
##  Length:60          2709331294142331392: 1   11830955  :12  
##  Class :character   2709988030803336192: 1   26669533  :12  
##  Mode  :character   2710520174450442752: 1   305701719 :12  
##                     2712841031885574144: 1   325734299 :12  
##                     2713610628619525120: 1   2094200507:12  
##                     2728067503614427136: 1                  
##                     (Other)            :54                  
##    timestamp                          type           commentsCount    
##  Min.   :2021-11-18 01:04:40.00   Length:60          Min.   :    0.0  
##  1st Qu.:2022-05-06 01:03:34.00   Class :character   1st Qu.:   24.5  
##  Median :2022-06-12 03:51:08.50   Mode  :character   Median : 4156.0  
##  Mean   :2022-05-14 14:32:21.25                      Mean   : 7014.0  
##  3rd Qu.:2022-06-21 05:43:24.75                      3rd Qu.:10062.0  
##  Max.   :2022-06-30 19:39:08.00                      Max.   :47466.0  
##                                                                       
##    likesCount      videoViewCount    
##  Min.   : 302345   Min.   : 1400768  
##  1st Qu.: 729113   1st Qu.: 3129538  
##  Median :1704191   Median : 7542422  
##  Mean   :1961473   Mean   : 8688494  
##  3rd Qu.:2670577   3rd Qu.:11477502  
##  Max.   :6990391   Max.   :21589039  
##                    NA's   :34

Stats over time

library(lubridate)
# break down the timestamp into month, weekday, hour
posts <- posts %>%
  mutate(
    month = month(timestamp),
    weekday = wday(timestamp),
    hour = hour(timestamp)
  )

by hour of the day

posts_long <- posts %>%
  pivot_longer(
    c(month, weekday, hour),
    names_to = 'time_type',
    values_to = 'time_value'
  ) %>%
  pivot_longer(
    c(commentsCount, likesCount, videoViewCount),
    names_to = 'engagement_type',
    values_to = 'engagement_value'
  )
posts_long %>%
  filter(time_type == 'hour') %>%
  filter(engagement_type != 'videoViewCount') %>%
  rename(hour_of_day = time_value) %>%
  ggplot(aes(x = hour_of_day, y = engagement_value, fill=type)) +
    scale_fill_brewer(palette = "Accent") +
    geom_bar(stat="identity") +
    facet_grid(engagement_type ~ ., scales = "free_y") +
    ggtitle('Engagement by hour of the day')

by day of the week

posts_long %>%
  filter(time_type == 'weekday') %>%
  filter(engagement_type != 'videoViewCount') %>%
  rename(day_of_week = time_value) %>%
  ggplot(aes(x = day_of_week, y = engagement_value, fill=type)) +
    scale_fill_brewer(palette = "Accent") +
    geom_bar(stat="identity") +
    facet_grid(engagement_type ~ ., scales = "free_y") +
    ggtitle('Engagement by day of the week')

Engagement types

library(scatterPlotMatrix)

posts_long %>%
  select(id, engagement_type, engagement_value) %>%
  filter(!is.na(engagement_value)) %>%
  distinct() %>%
  pivot_wider(
    names_from="engagement_type",
    values_from = "engagement_value"
  ) %>%
  select(-id) %>%
  scatterPlotMatrix(controlWidgets = TRUE)

Engagement by userID

by_user <- posts_long %>%
  distinct() %>%
  group_by(ownerId) %>%
  select(ownerId, engagement_value) %>%
  filter(!is.na(engagement_value)) %>%
  summarise(total_engagement=sum(engagement_value)) %>%
  arrange(total_engagement)

# order the userIds by engagement
by_user$ownerId = factor(by_user$ownerId, levels=by_user$ownerId)

by_user %>% ggplot(aes(x=ownerId, y=total_engagement)) + geom_bar(stat="identity")