Loading the data

library(tidyverse)
posts <- read_csv('ilp_dataset_1.csv') %>%
  select(
    alias, numberPosts, numberFollowers, date, url, description, tags, multipleImage, numberLikes
  ) %>%
  mutate(
    url = as.factor(url),
    alias = as.factor(alias)
  ) %>%
  rename(
    followers = numberFollowers,
    posts = numberPosts,
    likes = numberLikes
  )
summary(posts)
##                 alias           posts         followers       
##  _foodstories_     :   17   Min.   :   15   Min.   :  124965  
##  _hollyt           :   17   1st Qu.:  787   1st Qu.:  198039  
##  _ingo_1           :   17   Median : 1481   Median :  393843  
##  _mariannejacobsen_:   17   Mean   : 2316   Mean   :  997829  
##  _picolo           :   17   3rd Qu.: 2932   3rd Qu.:  935310  
##  _tinamaria        :   17   Max.   :27671   Max.   :22130730  
##  (Other)           :16437                                     
##       date                       
##  Min.   :1970-01-01 00:00:00.00  
##  1st Qu.:2017-04-10 05:00:00.00  
##  Median :2017-04-21 05:00:00.00  
##  Mean   :2017-04-04 08:50:26.45  
##  3rd Qu.:2017-04-27 05:00:00.00  
##  Max.   :2017-05-01 05:00:00.00  
##                                  
##                                                              url       
##  https://www.instagram.com/p/_umyXcP4B7/?taken-by=aaronsanimals:    1  
##  https://www.instagram.com/p/-CV8BmlCbJ/?taken-by=ibrahimgi    :    1  
##  https://www.instagram.com/p/-CWE8YFCbd/?taken-by=ibrahimgi    :    1  
##  https://www.instagram.com/p/-CWZyAFCcF/?taken-by=ibrahimgi    :    1  
##  https://www.instagram.com/p/-D6VgNFCZQ/?taken-by=ibrahimgi    :    1  
##  https://www.instagram.com/p/-D6YO8FCZX/?taken-by=ibrahimgi    :    1  
##  (Other)                                                       :16533  
##  description            tags           multipleImage       likes        
##  Length:16539       Length:16539       Mode :logical   Min.   :      0  
##  Class :character   Class :character   FALSE:16057     1st Qu.:   3232  
##  Mode  :character   Mode  :character   TRUE :482       Median :   7351  
##                                                        Mean   :  24414  
##                                                        3rd Qu.:  18357  
##                                                        Max.   :1115123  
## 

Distribution of stats

posts_long <- posts %>%
  pivot_longer(
    c('followers', 'likes', 'posts'),
    names_to = 'stat_name', values_to = 'stat_value'
  )
posts_long %>%
  group_by(stat_name) %>%
  ggplot(aes(x=stat_value)) +
  scale_x_log10() +
  geom_histogram() +
  facet_grid(stat_name~., scales = "free_x")

Stats over time

library(lubridate)
# break down the timestamp into month, weekday, hour
posts_long <- posts_long %>%
  mutate(
    month = month(date, label=TRUE),
    weekday = wday(date, label=TRUE),
    hour = factor(hour(date))
  )

By hour of the day

posts_long %>% filter(stat_name == 'likes') %>%
  ggplot(aes(x = stat_value, fill=hour)) +
    geom_histogram() + scale_x_log10() +
    facet_grid(hour ~ .) +
    ggtitle('Engagement by hour of the day') +
    xlab('No. of likes')

By day of the week

posts_long %>% filter(stat_name == 'likes') %>%
  ggplot(aes(x = stat_value, fill = weekday)) +
    geom_histogram() + scale_x_log10() +
    facet_grid(weekday ~ .) +
    ggtitle('Engagement by day of the week') +
    xlab('No. of likes')

Engagement types

library(scatterPlotMatrix)

posts %>%
  select(url, followers, likes) %>%
  filter(!is.na(likes)) %>%
  distinct() %>%
  select(-url) %>%
  scatterPlotMatrix(controlWidgets = TRUE)

Engagement by userID

by_user <- posts %>%
  distinct() %>%
  group_by(alias) %>%
  select(alias, likes) %>%
  filter(!is.na(likes)) %>%
  summarise(total_likes=sum(likes)) %>%
  ungroup()

by_user %>% ggplot(aes(x=total_likes)) +
  geom_histogram() +
  xlab('total likes per user') + ylab('number of users') +
  scale_x_log10()

Likes vs followers

posts %>%
  distinct() %>%
  select(alias, followers, likes) %>%
  group_by(alias) %>%
  summarise(followers=mean(followers), total_likes=sum(likes)) %>%
  ungroup() %>%
  select(followers, total_likes) %>%
  ggplot(aes(x=followers, y=total_likes)) +
    geom_point() + scale_x_log10() + scale_y_log10()