library(lme4)
library(tidyverse)
library(sjPlot)
library(stringr)
library(lmerTest) # don't like this but am using it because our pretty tables are not working for the non-linear models
library(lubridate)
tweets <- read_csv("~/Dropbox/1_Research/Twitter_Data_Suite/hashtag/miched3/2_tweets.csv")
liwc_results <- read_csv("liwc_results.csv")
liwc_results <- liwc_results[-1, ]
liwc_results <- rename(liwc_results, tweet_link = `Source (A)`)
tweets <- left_join(tweets, liwc_results, by = "tweet_link")
tweets <- mutate(tweets,
hour = hour(date))
tweets <- mutate(tweets,
hour_chunk = case_when(
hour %in% c(0, 1, 2, 3) ~ "12-3 am",
hour %in% c(4, 5, 6, 7) ~ "4-7 am",
hour %in% c(8, 9, 10, 11) ~ "8-11 am",
hour %in% c(12, 13, 14, 15) ~ "12-3 pm",
hour %in% c(16, 17, 18, 19) ~ "4-7 pm",
hour %in% c(20, 21, 22, 23) ~ "8-11 pm"
))
rm(liwc_results)
profiles <- read_csv("~/Dropbox/1_Research/Twitter_Data_Suite/hashtag/miched3/3_profiles.csv")
profiles$num_following <- stringr::str_count(profiles$following, "\\*")
profiles <- rename(profiles, screen_name = screenName)
df <- left_join(tweets, profiles, by = "screen_name")
rm(profiles)
rm(tweets)
df <- mutate(df,
interactions = scraped_num_favorites + scraped_num_retweets + scraped_num_replies,
nchar = nchar(text))
nrow(df) # total number of tweets - originals, quotes, replies, retweets
## [1] 89943
nrow(filter(df, type == "ORIG")) # number of original tweets
## [1] 37291
nrow(df) - nrow(filter(df, type == "ORIG")) # removed this many tweets
## [1] 52652
df <- filter(df, type == "ORIG") # filter data
My understanding is that we can use a Poisson distribution, which ranges from 0 to positive infinity, and is for “the discrete number of events”. Based on our data, I think there may be more 0’s than would expect, and so I think we maybe could consider a zero-inflated Poisson distribution (https://en.wikipedia.org/wiki/Zero-inflated_model). Another way to address this I think is to run two models: One for whether the outcome is 0 or greater than 0 (so a logistic), and one for if the outcome is greater than 0, what its value is (so a Poisson).
df$interactions_binary <- ifelse(df$interactions == 0, 0, 1)
df$interactions_non_zero <- ifelse(df$interactions > 0, df$interactions, NA)
I will add these models to those here, after getting these models finalized using a Poisson distribution.
This suggests that we should possibly use logged interactions as the outcome. There seem to be very many 0s. This could suggest we use a count data model. Sticking with continuous outcome to start.
set_theme(theme_bw())
ggplot(df, aes(x = interactions)) +
geom_histogram(bins = 50)
safe_log <- function(x) {
ifelse(x == 0, 0, log(x))
}
df <- mutate(df,
interactions_log = safe_log(interactions))
ggplot(df, aes(x = interactions_log)) +
geom_histogram(bins = 35)
df %>%
group_by(num_urls) %>%
summarize(mean_interactions = mean(interactions),
n = n())
# This plot is a bit misleading, given the number of tweets with three or four URLs
# It does look very approximately linear
# df %>%
# group_by(num_urls) %>%
# summarize(mean_interactions = mean(interactions)) %>%
# ggplot(aes(x = num_urls, y = mean_interactions)) +
# geom_col()
Original model:
m1 <- lmer(interactions ~
1 +
(1 | screen_name),
data = df)
sjstats::icc(m1) # 25.7 % variability at tweeter level
## Linear mixed model
## Family: gaussian (identity)
## Formula: interactions ~ 1 + (1 | screen_name)
##
## ICC (screen_name): 0.257083
Poisson model:
m1i <- glmer(interactions ~
1 +
(1 | screen_name),
family = poisson,
data = df)
summary(m1i)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: interactions ~ 1 + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 202630.2 202647.2 -101313.1 202626.2 37289
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.440 -1.153 -0.552 0.569 86.340
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.7624 0.8732
## Number of obs: 37291, groups: screen_name, 1766
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.70443 0.02437 28.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjstats::icc(m1i) # 43.2 % variability at tweeter level when we use a model with a log linonk funct and a Poisson distribution
## Generalized linear mixed model
## Family: poisson (log)
## Formula: interactions ~ 1 + (1 | screen_name)
##
## ICC (screen_name): 0.432602
Two-step approach step 1:
m1ii <- glmer(interactions_binary ~
1 +
(1 | screen_name),
family = binomial,
data = df)
summary(m1ii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: interactions_binary ~ 1 + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 39164.9 39182.0 -19580.5 39160.9 37289
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8038 -0.6228 0.4226 0.5556 3.4684
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 1.31 1.145
## Number of obs: 37291, groups: screen_name, 1766
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.12575 0.04128 27.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjstats::icc(m1ii) # 28.47 % variability at tweeter level when we use a model with a log linonk funct and a Poisson distribution
## Generalized linear mixed model
## Family: binomial (logit)
## Formula: interactions_binary ~ 1 + (1 | screen_name)
##
## ICC (screen_name): 0.284775
Two-step approach step 2:
m1iii <- glmer(interactions_non_zero ~
1 +
(1 | screen_name),
family = poisson,
data = df)
summary(m1iii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: interactions_non_zero ~ 1 + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 150032.3 150048.6 -75014.1 150028.3 25843
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.395 -1.124 -0.487 0.547 71.424
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.2618 0.5116
## Number of obs: 25845, groups: screen_name, 1472
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.24629 0.01605 77.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjstats::icc(m1iii) # 20.7 % variability at tweeter level when we use a model with a log linonk funct and a Poisson distribution
## Generalized linear mixed model
## Family: poisson (log)
## Formula: interactions_non_zero ~ 1 + (1 | screen_name)
##
## ICC (screen_name): 0.207472
Original model:
m1 <- lmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
(1 | screen_name),
data = df)
summary(m1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## (1 | screen_name)
## Data: df
##
## REML criterion at convergence: 213738.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.958 -0.465 -0.162 0.196 45.873
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 6.086 2.467
## Residual 18.781 4.334
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.08716 0.09598 1402.70000 32.165 < 2e-16
## scale(followersCount) 0.35357 0.05945 1443.70000 5.947 3.42e-09
## scale(num_following) 1.24996 5.18388 1742.20000 0.241 0.809487
## scale(statusesCount) -0.10968 0.07438 2116.50000 -1.475 0.140474
## scale(favoritesCount) 0.33087 0.09580 1787.40000 3.454 0.000566
## scale(friendsCount) -1.24043 5.17987 1742.60000 -0.239 0.810769
##
## (Intercept) ***
## scale(followersCount) ***
## scale(num_following)
## scale(statusesCount)
## scale(favoritesCount) ***
## scale(friendsCount)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(flC) scl(_) scl(sC) scl(fvC)
## scl(fllwrC) -0.030
## scl(nm_fll) 0.419 -0.005
## scl(sttssC) 0.143 -0.126 0.008
## scl(fvrtsC) 0.221 -0.008 -0.013 -0.236
## scl(frndsC) -0.420 -0.001 -1.000 -0.008 0.009
Num followers and num favorites (positively) impact number of interactions
Poisson model:
m1i <- glmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
(1 | screen_name),
family = poisson,
data = df)
summary(m1i)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 199583.2 199642.7 -99784.6 199569.2 36711
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.441 -1.159 -0.552 0.569 86.342
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.729 0.8538
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.69741 0.02637 26.452 < 2e-16 ***
## scale(followersCount) 0.06601 0.01659 3.979 6.91e-05 ***
## scale(num_following) -0.93557 0.55424 -1.688 0.0914 .
## scale(statusesCount) -0.14077 0.02592 -5.432 5.58e-08 ***
## scale(favoritesCount) 0.10731 0.02553 4.203 2.63e-05 ***
## scale(friendsCount) 0.95007 0.55375 1.716 0.0862 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(flC) scl(_) scl(sC) scl(fvC)
## scl(fllwrC) -0.056
## scl(nm_fll) 0.171 -0.008
## scl(sttssC) 0.242 -0.158 0.012
## scl(fvrtsC) 0.182 -0.013 -0.009 -0.247
## scl(frndsC) -0.173 -0.006 -1.000 -0.012 -0.002
## convergence code: 0
## Model failed to converge with max|grad| = 0.00415378 (tol = 0.001, component 1)
Num followers and num favorites (positively) impact number of interactions; now, statuses count negatively impacts number of interactions.
Two-step approach step 1:
m1ii <- glmer(interactions_binary ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
(1 | screen_name),
family = binomial,
data = df)
summary(m1ii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## interactions_binary ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 38544.2 38603.8 -19265.1 38530.2 36711
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6602 -0.6231 0.4186 0.5561 6.0917
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 1.188 1.09
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.09184 0.04624 23.614 < 2e-16 ***
## scale(followersCount) 0.05386 0.03192 1.687 0.091589 .
## scale(num_following) -5.75486 1.70489 -3.376 0.000737 ***
## scale(statusesCount) -0.26182 0.04255 -6.153 7.58e-10 ***
## scale(favoritesCount) 0.25067 0.05433 4.613 3.96e-06 ***
## scale(friendsCount) 5.81506 1.70299 3.415 0.000639 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(flC) scl(_) scl(sC) scl(fvC)
## scl(fllwrC) -0.027
## scl(nm_fll) 0.272 -0.009
## scl(sttssC) 0.147 -0.145 0.001
## scl(fvrtsC) 0.285 0.015 -0.027 -0.271
## scl(frndsC) -0.272 -0.001 -1.000 -0.003 0.020
## convergence code: 0
## Model failed to converge with max|grad| = 0.00124321 (tol = 0.001, component 1)
Two-step approach step 2:
m1iii <- glmer(interactions_non_zero ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
(1 | screen_name),
family = poisson,
data = df)
summary(m1iii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## interactions_non_zero ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 147998.2 148055.2 -73992.1 147984.2 25482
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.398 -1.123 -0.490 0.546 71.422
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.2484 0.4984
## Number of obs: 25489, groups: screen_name, 1430
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.25965 0.01928 65.34 < 2e-16 ***
## scale(followersCount) 0.06017 0.01077 5.59 2.28e-08 ***
## scale(num_following) 0.16250 1.02439 0.16 0.8740
## scale(statusesCount) -0.01038 0.01825 -0.57 0.5694
## scale(favoritesCount) 0.03809 0.01694 2.25 0.0246 *
## scale(friendsCount) -0.16365 1.02371 -0.16 0.8730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(flC) scl(_) scl(sC) scl(fvC)
## scl(fllwrC) -0.076
## scl(nm_fll) 0.438 -0.016
## scl(sttssC) 0.289 -0.184 0.046
## scl(fvrtsC) 0.180 -0.020 0.006 -0.162
## scl(frndsC) -0.439 0.011 -1.000 -0.047 -0.010
## convergence code: 0
## Model failed to converge with max|grad| = 0.00976129 (tol = 0.001, component 1)
Original model:
m1 <- lmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
(1 | screen_name),
data = df)
summary(m1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + (1 |
## screen_name)
## Data: df
##
## REML criterion at convergence: 212860.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.003 -0.450 -0.161 0.198 46.087
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 6.092 2.468
## Residual 18.305 4.278
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.392e+00 1.759e-01 1.124e+04 7.916 2.66e-15
## scale(followersCount) 3.610e-01 5.919e-02 1.434e+03 6.099 1.37e-09
## scale(num_following) 5.380e-01 5.158e+00 1.727e+03 0.104 0.916948
## scale(statusesCount) -8.009e-02 7.399e-02 2.128e+03 -1.082 0.279168
## scale(favoritesCount) 2.966e-01 9.536e-02 1.785e+03 3.111 0.001897
## scale(friendsCount) -5.258e-01 5.154e+00 1.727e+03 -0.102 0.918760
## dayMonday 9.488e-03 9.906e-02 3.610e+04 0.096 0.923695
## daySaturday 4.135e-01 1.200e-01 3.627e+04 3.446 0.000569
## daySunday 8.553e-01 1.232e-01 3.647e+04 6.941 3.97e-12
## dayThursday -4.351e-02 9.412e-02 3.612e+04 -0.462 0.643910
## dayTuesday -6.625e-02 9.326e-02 3.621e+04 -0.710 0.477490
## dayWednesday -2.001e-01 8.858e-02 3.636e+04 -2.259 0.023874
## hour_chunk12-3 pm -3.864e-01 7.668e-02 3.668e+04 -5.039 4.71e-07
## hour_chunk4-7 am -2.002e-01 2.267e-01 3.670e+04 -0.883 0.377334
## hour_chunk4-7 pm -1.481e-01 8.135e-02 3.667e+04 -1.821 0.068679
## hour_chunk8-11 am 3.194e-01 1.236e-01 3.647e+04 2.584 0.009782
## hour_chunk8-11 pm -2.414e-02 8.696e-02 3.669e+04 -0.278 0.781348
## num_urls -1.332e+00 5.756e-02 3.634e+04 -23.131 < 2e-16
## num_hashtags 1.120e-01 2.362e-02 3.182e+04 4.741 2.14e-06
## nchar 1.857e-02 1.013e-03 3.670e+04 18.328 < 2e-16
##
## (Intercept) ***
## scale(followersCount) ***
## scale(num_following)
## scale(statusesCount)
## scale(favoritesCount) **
## scale(friendsCount)
## dayMonday
## daySaturday ***
## daySunday ***
## dayThursday
## dayTuesday
## dayWednesday *
## hour_chunk12-3 pm ***
## hour_chunk4-7 am
## hour_chunk4-7 pm .
## hour_chunk8-11 am **
## hour_chunk8-11 pm
## num_urls ***
## num_hashtags ***
## nchar ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Poisson model:
m1i <- glmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
(1 | screen_name),
family = poisson,
data = df)
summary(m1i)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + (1 |
## screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 193931.1 194109.8 -96944.5 193889.1 36697
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.988 -1.143 -0.524 0.563 74.505
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.6835 0.8267
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.537e-02 3.450e-02 2.47 0.013336 *
## scale(followersCount) 7.058e-02 1.610e-02 4.38 1.17e-05 ***
## scale(num_following) -1.011e+00 1.112e+00 -0.91 0.363525
## scale(statusesCount) -1.272e-01 2.490e-02 -5.11 3.23e-07 ***
## scale(favoritesCount) 9.573e-02 2.485e-02 3.85 0.000117 ***
## scale(friendsCount) 1.024e+00 1.111e+00 0.92 0.356867
## dayMonday 1.740e-02 1.414e-02 1.23 0.218562
## daySaturday 1.512e-01 1.639e-02 9.22 < 2e-16 ***
## daySunday 2.855e-01 1.623e-02 17.59 < 2e-16 ***
## dayThursday 9.216e-06 1.338e-02 0.00 0.999450
## dayTuesday -9.334e-03 1.334e-02 -0.70 0.484189
## dayWednesday -5.796e-02 1.262e-02 -4.59 4.37e-06 ***
## hour_chunk12-3 pm -1.425e-01 1.051e-02 -13.56 < 2e-16 ***
## hour_chunk4-7 am -3.752e-02 3.803e-02 -0.99 0.323865
## hour_chunk4-7 pm -5.723e-02 1.116e-02 -5.13 2.89e-07 ***
## hour_chunk8-11 am 1.192e-01 1.654e-02 7.21 5.71e-13 ***
## hour_chunk8-11 pm -1.273e-02 1.169e-02 -1.09 0.276107
## num_urls -4.488e-01 8.107e-03 -55.35 < 2e-16 ***
## num_hashtags 4.196e-02 3.245e-03 12.93 < 2e-16 ***
## nchar 6.386e-03 1.462e-04 43.67 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 0.705674 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## failure to converge in 10000 evaluations
Two-step approach step 1:
m1ii <- glmer(interactions_binary ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
(1 | screen_name),
family = binomial,
data = df)
summary(m1ii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## interactions_binary ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + (1 |
## screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 37863.2 38042.0 -18910.6 37821.2 36697
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6981 -0.6533 0.3930 0.5515 6.1260
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.9677 0.9837
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5104220 0.0945828 5.397 6.79e-08 ***
## scale(followersCount) 0.0587369 0.0304307 1.930 0.05358 .
## scale(num_following) -6.3017041 2.7381691 -2.301 0.02137 *
## scale(statusesCount) -0.2326032 0.0390491 -5.957 2.57e-09 ***
## scale(favoritesCount) 0.2136833 0.0515387 4.146 3.38e-05 ***
## scale(friendsCount) 6.3814320 2.7354194 2.333 0.01965 *
## dayMonday -0.0764005 0.0557325 -1.371 0.17042
## daySaturday 0.1127096 0.0686920 1.641 0.10084
## daySunday 0.0864640 0.0701599 1.232 0.21781
## dayThursday -0.1571252 0.0529622 -2.967 0.00301 **
## dayTuesday -0.1140148 0.0521780 -2.185 0.02888 *
## dayWednesday 0.0291299 0.0502822 0.579 0.56237
## hour_chunk12-3 pm -0.4134710 0.0438192 -9.436 < 2e-16 ***
## hour_chunk4-7 am -0.3018583 0.1211914 -2.491 0.01275 *
## hour_chunk4-7 pm -0.3331724 0.0460432 -7.236 4.62e-13 ***
## hour_chunk8-11 am -0.1810371 0.0717141 -2.524 0.01159 *
## hour_chunk8-11 pm -0.2438335 0.0496538 -4.911 9.08e-07 ***
## num_urls -0.6067395 0.0319670 -18.980 < 2e-16 ***
## num_hashtags 0.0397182 0.0132582 2.996 0.00274 **
## nchar 0.0083774 0.0005784 14.484 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 0.115523 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Two-step approach step 2:
m1iii <- glmer(interactions_non_zero ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
(1 | screen_name),
family = poisson,
data = df)
summary(m1iii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## interactions_non_zero ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + (1 |
## screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 144878.6 145049.7 -72418.3 144836.6 25468
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.982 -1.084 -0.473 0.526 63.868
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.2447 0.4946
## Number of obs: 25489, groups: screen_name, 1430
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7707576 0.0286254 26.93 < 2e-16 ***
## scale(followersCount) 0.0618286 0.0106560 5.80 6.54e-09 ***
## scale(num_following) 0.0718411 0.9080461 0.08 0.9369
## scale(statusesCount) 0.0012713 0.0180686 0.07 0.9439
## scale(favoritesCount) 0.0333510 0.0168689 1.98 0.0480 *
## scale(friendsCount) -0.0755511 0.9074644 -0.08 0.9336
## dayMonday 0.0260419 0.0141420 1.84 0.0656 .
## daySaturday 0.1325150 0.0163966 8.08 6.38e-16 ***
## daySunday 0.2644783 0.0161986 16.33 < 2e-16 ***
## dayThursday 0.0269294 0.0133728 2.01 0.0440 *
## dayTuesday 0.0098193 0.0133242 0.74 0.4612
## dayWednesday -0.0559653 0.0126043 -4.44 8.99e-06 ***
## hour_chunk12-3 pm -0.0820064 0.0104654 -7.84 4.65e-15 ***
## hour_chunk4-7 am -0.0264195 0.0382197 -0.69 0.4894
## hour_chunk4-7 pm -0.0097834 0.0110575 -0.88 0.3763
## hour_chunk8-11 am 0.1370173 0.0165395 8.28 < 2e-16 ***
## hour_chunk8-11 pm 0.0180216 0.0116295 1.55 0.1212
## num_urls -0.3170616 0.0080417 -39.43 < 2e-16 ***
## num_hashtags 0.0283210 0.0032096 8.82 < 2e-16 ***
## nchar 0.0047443 0.0001468 32.33 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 0.809073 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
More information on the LIWC here: https://s3-us-west-2.amazonaws.com/downloads.liwc.net/LIWC2015_OperatorManual.pdf
Some of the four categories we can use - these are summary variables, a little different from the other LIWC variables:
Original model:
m1 <- lmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
Analytic +
Clout +
Authentic +
Tone +
(1 | screen_name),
data = df)
summary(m1)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + Analytic +
## Clout + Authentic + Tone + (1 | screen_name)
## Data: df
##
## REML criterion at convergence: 212851.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.080 -0.449 -0.161 0.198 46.120
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 6.083 2.466
## Residual 18.278 4.275
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.879e+00 2.092e-01 1.748e+04 8.986 < 2e-16
## scale(followersCount) 3.640e-01 5.915e-02 1.434e+03 6.153 9.85e-10
## scale(num_following) 4.660e-01 5.155e+00 1.727e+03 0.090 0.927972
## scale(statusesCount) -8.303e-02 7.396e-02 2.130e+03 -1.123 0.261675
## scale(favoritesCount) 2.985e-01 9.529e-02 1.785e+03 3.133 0.001759
## scale(friendsCount) -4.553e-01 5.151e+00 1.728e+03 -0.088 0.929579
## dayMonday 1.430e-02 9.899e-02 3.610e+04 0.144 0.885121
## daySaturday 4.162e-01 1.199e-01 3.627e+04 3.471 0.000519
## daySunday 8.558e-01 1.232e-01 3.646e+04 6.949 3.75e-12
## dayThursday -4.342e-02 9.406e-02 3.612e+04 -0.462 0.644361
## dayTuesday -6.575e-02 9.320e-02 3.621e+04 -0.706 0.480496
## dayWednesday -1.849e-01 8.859e-02 3.635e+04 -2.087 0.036862
## hour_chunk12-3 pm -4.106e-01 7.685e-02 3.668e+04 -5.343 9.18e-08
## hour_chunk4-7 am -2.232e-01 2.266e-01 3.669e+04 -0.985 0.324736
## hour_chunk4-7 pm -1.769e-01 8.156e-02 3.667e+04 -2.169 0.030058
## hour_chunk8-11 am 3.137e-01 1.236e-01 3.646e+04 2.537 0.011172
## hour_chunk8-11 pm -4.004e-02 8.704e-02 3.668e+04 -0.460 0.645534
## num_urls -1.417e+00 5.921e-02 3.645e+04 -23.938 < 2e-16
## num_hashtags 9.712e-02 2.377e-02 3.198e+04 4.086 4.39e-05
## nchar 1.935e-02 1.021e-03 3.669e+04 18.946 < 2e-16
## Analytic 1.272e-04 1.076e-03 3.635e+04 0.118 0.905924
## Clout -4.964e-03 9.982e-04 3.639e+04 -4.973 6.62e-07
## Authentic -5.483e-03 8.390e-04 3.641e+04 -6.535 6.45e-11
## Tone -1.156e-03 6.412e-04 3.635e+04 -1.803 0.071386
##
## (Intercept) ***
## scale(followersCount) ***
## scale(num_following)
## scale(statusesCount)
## scale(favoritesCount) **
## scale(friendsCount)
## dayMonday
## daySaturday ***
## daySunday ***
## dayThursday
## dayTuesday
## dayWednesday *
## hour_chunk12-3 pm ***
## hour_chunk4-7 am
## hour_chunk4-7 pm *
## hour_chunk8-11 am *
## hour_chunk8-11 pm
## num_urls ***
## num_hashtags ***
## nchar ***
## Analytic
## Clout ***
## Authentic ***
## Tone .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Poisson model:
m1i <- glmer(interactions ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
Analytic +
Clout +
Authentic +
Tone +
(1 | screen_name),
family = poisson,
data = df)
summary(m1i)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: interactions ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + Analytic +
## Clout + Authentic + Tone + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 193613.1 193825.9 -96781.6 193563.1 36693
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.337 -1.145 -0.525 0.554 75.115
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.6857 0.8281
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.476e-01 3.807e-02 6.50 7.80e-11 ***
## scale(followersCount) 7.144e-02 1.612e-02 4.43 9.40e-06 ***
## scale(num_following) -9.969e-01 1.248e+00 -0.80 0.424478
## scale(statusesCount) -1.277e-01 2.493e-02 -5.12 3.02e-07 ***
## scale(favoritesCount) 9.656e-02 2.489e-02 3.88 0.000105 ***
## scale(friendsCount) 1.010e+00 1.247e+00 0.81 0.417983
## dayMonday 1.926e-02 1.414e-02 1.36 0.173247
## daySaturday 1.515e-01 1.640e-02 9.24 < 2e-16 ***
## daySunday 2.852e-01 1.624e-02 17.56 < 2e-16 ***
## dayThursday -2.857e-03 1.338e-02 -0.21 0.830960
## dayTuesday -1.066e-02 1.334e-02 -0.80 0.424245
## dayWednesday -5.360e-02 1.262e-02 -4.25 2.17e-05 ***
## hour_chunk12-3 pm -1.490e-01 1.053e-02 -14.15 < 2e-16 ***
## hour_chunk4-7 am -4.011e-02 3.806e-02 -1.05 0.291964
## hour_chunk4-7 pm -6.557e-02 1.118e-02 -5.86 4.55e-09 ***
## hour_chunk8-11 am 1.257e-01 1.655e-02 7.59 3.10e-14 ***
## hour_chunk8-11 pm -1.674e-02 1.171e-02 -1.43 0.152971
## num_urls -4.749e-01 8.310e-03 -57.14 < 2e-16 ***
## num_hashtags 3.684e-02 3.268e-03 11.27 < 2e-16 ***
## nchar 6.607e-03 1.474e-04 44.83 < 2e-16 ***
## Analytic -3.487e-05 1.447e-04 -0.24 0.809589
## Clout -1.611e-03 1.345e-04 -11.97 < 2e-16 ***
## Authentic -1.808e-03 1.156e-04 -15.64 < 2e-16 ***
## Tone -2.772e-04 8.819e-05 -3.14 0.001670 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 7.21139 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Two-step approach step 1:
m1ii <- glmer(interactions_binary ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
Analytic +
Clout +
Authentic +
Tone +
(1 | screen_name),
family = binomial,
data = df)
summary(m1ii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## interactions_binary ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + Analytic +
## Clout + Authentic + Tone + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 37851.1 38063.9 -18900.5 37801.1 36693
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6190 -0.6508 0.3923 0.5520 6.0189
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.9748 0.9873
## Number of obs: 36718, groups: screen_name, 1716
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.965e-01 1.178e-01 5.064 4.12e-07 ***
## scale(followersCount) 5.879e-02 3.048e-02 1.929 0.05373 .
## scale(num_following) -6.374e+00 4.123e+00 -1.546 0.12210
## scale(statusesCount) -2.306e-01 3.910e-02 -5.898 3.69e-09 ***
## scale(favoritesCount) 2.133e-01 5.166e-02 4.129 3.64e-05 ***
## scale(friendsCount) 6.455e+00 4.119e+00 1.567 0.11711
## dayMonday -7.147e-02 5.575e-02 -1.282 0.19983
## daySaturday 1.200e-01 6.873e-02 1.746 0.08076 .
## daySunday 8.978e-02 7.018e-02 1.279 0.20081
## dayThursday -1.527e-01 5.298e-02 -2.882 0.00395 **
## dayTuesday -1.102e-01 5.219e-02 -2.111 0.03473 *
## dayWednesday 3.913e-02 5.035e-02 0.777 0.43707
## hour_chunk12-3 pm -4.183e-01 4.397e-02 -9.513 < 2e-16 ***
## hour_chunk4-7 am -3.060e-01 1.212e-01 -2.524 0.01161 *
## hour_chunk4-7 pm -3.394e-01 4.622e-02 -7.343 2.08e-13 ***
## hour_chunk8-11 am -1.829e-01 7.173e-02 -2.549 0.01079 *
## hour_chunk8-11 pm -2.463e-01 4.974e-02 -4.952 7.34e-07 ***
## num_urls -6.219e-01 3.309e-02 -18.792 < 2e-16 ***
## num_hashtags 3.861e-02 1.337e-02 2.888 0.00387 **
## nchar 8.459e-03 5.835e-04 14.498 < 2e-16 ***
## Analytic 8.623e-05 6.311e-04 0.137 0.89133
## Clout -1.684e-03 5.813e-04 -2.897 0.00377 **
## Authentic -1.567e-03 4.863e-04 -3.221 0.00128 **
## Tone 8.468e-04 3.696e-04 2.291 0.02196 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 0.480979 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Two-step approach step 2:
m1iii <- glmer(interactions_non_zero ~
scale(followersCount) +
scale(num_following) +
scale(statusesCount) +
scale(favoritesCount) +
scale(friendsCount) +
day +
hour_chunk +
num_urls +
num_hashtags +
nchar +
Analytic +
Clout +
Authentic +
Tone +
(1 | screen_name),
family = poisson,
data = df)
summary(m1iii)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## interactions_non_zero ~ scale(followersCount) + scale(num_following) +
## scale(statusesCount) + scale(favoritesCount) + scale(friendsCount) +
## day + hour_chunk + num_urls + num_hashtags + nchar + Analytic +
## Clout + Authentic + Tone + (1 | screen_name)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 144659.5 144863.1 -72304.7 144609.5 25464
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.262 -1.080 -0.474 0.532 64.040
##
## Random effects:
## Groups Name Variance Std.Dev.
## screen_name (Intercept) 0.2422 0.4921
## Number of obs: 25489, groups: screen_name, 1430
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.179e-01 3.231e-02 28.41 < 2e-16 ***
## scale(followersCount) 6.321e-02 1.061e-02 5.96 2.57e-09 ***
## scale(num_following) 6.428e-02 8.979e-01 0.07 0.9429
## scale(statusesCount) -1.236e-04 1.804e-02 -0.01 0.9945
## scale(favoritesCount) 3.398e-02 1.681e-02 2.02 0.0433 *
## scale(friendsCount) -6.901e-02 8.974e-01 -0.08 0.9387
## dayMonday 2.826e-02 1.414e-02 2.00 0.0457 *
## daySaturday 1.326e-01 1.640e-02 8.09 6.11e-16 ***
## daySunday 2.638e-01 1.620e-02 16.29 < 2e-16 ***
## dayThursday 2.505e-02 1.338e-02 1.87 0.0612 .
## dayTuesday 9.567e-03 1.332e-02 0.72 0.4727
## dayWednesday -5.298e-02 1.261e-02 -4.20 2.66e-05 ***
## hour_chunk12-3 pm -8.703e-02 1.049e-02 -8.30 < 2e-16 ***
## hour_chunk4-7 am -2.664e-02 3.823e-02 -0.70 0.4859
## hour_chunk4-7 pm -1.744e-02 1.109e-02 -1.57 0.1158
## hour_chunk8-11 am 1.422e-01 1.655e-02 8.59 < 2e-16 ***
## hour_chunk8-11 pm 1.459e-02 1.165e-02 1.25 0.2103
## num_urls -3.398e-01 8.250e-03 -41.18 < 2e-16 ***
## num_hashtags 2.410e-02 3.233e-03 7.46 8.94e-14 ***
## nchar 4.968e-03 1.479e-04 33.60 < 2e-16 ***
## Analytic -1.874e-04 1.442e-04 -1.30 0.1937
## Clout -1.237e-03 1.340e-04 -9.23 < 2e-16 ***
## Authentic -1.491e-03 1.156e-04 -12.90 < 2e-16 ***
## Tone -3.911e-04 8.813e-05 -4.44 9.10e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## convergence code: 0
## Model failed to converge with max|grad| = 0.987856 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?