Loading the data
library(tidyverse)
posts <- read_csv('insta_posts.csv') %>%
select(
caption, id, ownerId, timestamp, type, commentsCount, likesCount, videoViewCount
) %>%
mutate(
id = as.factor(id),
ownerId = as.factor(ownerId)
)
summary(posts)
## caption id ownerId
## Length:60 2709331294142331392: 1 11830955 :12
## Class :character 2709988030803336192: 1 26669533 :12
## Mode :character 2710520174450442752: 1 305701719 :12
## 2712841031885574144: 1 325734299 :12
## 2713610628619525120: 1 2094200507:12
## 2728067503614427136: 1
## (Other) :54
## timestamp type commentsCount
## Min. :2021-11-18 01:04:40.00 Length:60 Min. : 0.0
## 1st Qu.:2022-05-06 01:03:34.00 Class :character 1st Qu.: 24.5
## Median :2022-06-12 03:51:08.50 Mode :character Median : 4156.0
## Mean :2022-05-14 14:32:21.25 Mean : 7014.0
## 3rd Qu.:2022-06-21 05:43:24.75 3rd Qu.:10062.0
## Max. :2022-06-30 19:39:08.00 Max. :47466.0
##
## likesCount videoViewCount
## Min. : 302345 Min. : 1400768
## 1st Qu.: 729113 1st Qu.: 3129538
## Median :1704191 Median : 7542422
## Mean :1961473 Mean : 8688494
## 3rd Qu.:2670577 3rd Qu.:11477502
## Max. :6990391 Max. :21589039
## NA's :34
Stats over time
library(lubridate)
# break down the timestamp into month, weekday, hour
posts <- posts %>%
mutate(
month = month(timestamp),
weekday = wday(timestamp),
hour = hour(timestamp)
)
by hour of the day
posts_long <- posts %>%
pivot_longer(
c(month, weekday, hour),
names_to = 'time_type',
values_to = 'time_value'
) %>%
pivot_longer(
c(commentsCount, likesCount, videoViewCount),
names_to = 'engagement_type',
values_to = 'engagement_value'
)
posts_long %>%
filter(time_type == 'hour') %>%
filter(engagement_type != 'videoViewCount') %>%
rename(hour_of_day = time_value) %>%
ggplot(aes(x = hour_of_day, y = engagement_value, fill=type)) +
scale_fill_brewer(palette = "Accent") +
geom_bar(stat="identity") +
facet_grid(engagement_type ~ ., scales = "free_y") +
ggtitle('Engagement by hour of the day')

by day of the week
posts_long %>%
filter(time_type == 'weekday') %>%
filter(engagement_type != 'videoViewCount') %>%
rename(day_of_week = time_value) %>%
ggplot(aes(x = day_of_week, y = engagement_value, fill=type)) +
scale_fill_brewer(palette = "Accent") +
geom_bar(stat="identity") +
facet_grid(engagement_type ~ ., scales = "free_y") +
ggtitle('Engagement by day of the week')

Engagement types
library(scatterPlotMatrix)
posts_long %>%
select(id, engagement_type, engagement_value) %>%
filter(!is.na(engagement_value)) %>%
distinct() %>%
pivot_wider(
names_from="engagement_type",
values_from = "engagement_value"
) %>%
select(-id) %>%
scatterPlotMatrix(controlWidgets = TRUE)
Engagement by userID
by_user <- posts_long %>%
distinct() %>%
group_by(ownerId) %>%
select(ownerId, engagement_value) %>%
filter(!is.na(engagement_value)) %>%
summarise(total_engagement=sum(engagement_value)) %>%
arrange(total_engagement)
# order the userIds by engagement
by_user$ownerId = factor(by_user$ownerId, levels=by_user$ownerId)
by_user %>% ggplot(aes(x=ownerId, y=total_engagement)) + geom_bar(stat="identity")
