library(rtweet)
library(dplyr)
library(ggplot2)
library(lubridate)
library(SentimentAnalysis)
library(dlookr)
library(hrbrthemes)
library(viridis)
library(stringr)
library(tidytext)
library(tm)
library(tokenizers)
library(wordcloud)
library(tidyr)
library(glue)
library(igraph)
library(syuzhet)
1. Cleaning and manipulating data
The purpose of this process will be to remove all unnecessary
variables not required for research as well as create additional
variables of sentiment and reception, then split the data frame based on
the source of each tweet.
#Rename source variable as source_clean, and bind it to the final dataset.
final_data <- cbind(final_source$source, final_data)
names(final_data)[1] = "source_clean"
final_data <- subset(final_data, select = -c(source))
names(final_data)[1] = "source"
#Generate reception scores
options(scipen = 999)
#Generate retweet ratio
final_data$retweet_ratio <-
(final_data$retweet_count*2/final_data$followers_count) * 100
#Generate favorite ratio
final_data$favorite_ratio <-
(final_data$favorite_count/final_data$followers_count) * 100
#Generate reception score for each tweet.
final_data$reception <- apply(final_data[14:15], 1, mean, na.rm = TRUE)
#Transform the text viable into a usable form.
text_corpus <- Corpus(VectorSource(final_data$text))
text_corpus <- tm_map(text_corpus, tolower)
## Warning in tm_map.SimpleCorpus(text_corpus, tolower): transformation drops
## documents
text_corpus <- tm_map(text_corpus, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(text_corpus, removeWords, stopwords("english")):
## transformation drops documents
text_corpus <- tm_map(text_corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(text_corpus, removePunctuation): transformation
## drops documents
text_df <- data.frame(text_clean = get("content", text_corpus), stringsAsFactors = FALSE)
final_data <- cbind.data.frame(final_data, text_df)
#Remove redundant text variable "full_text"
final_data <- subset(final_data, select= -c(text, full_text))
#create sentiment dataframe
sentiment <- analyzeSentiment(final_data$text_clean)
#bind and rename sentiment scores to dataset
final_data <- cbind(final_data, sentiment$SentimentGI)
names(final_data)[16] = "sentiment"
#reorder variables in dataframe
final_data <- final_data[, c(2,3,4,15,1,5,6,7,8,9,10,11,12,13,14,16)]
#Create new data frames for future analysis.
#Data frame for users
only_users <- final_data[final_data$source == "User", ]
#Data frame for LIVGolfInv
LIVGolf <- final_data[final_data$screen_name == "LIVGolfInv", ]
#Data frame for media
media <- final_data[final_data$source == "Media", ]
2. Exploratory Analysis
The purpose of this process is to understand basic properties of the
dataset and begin to understand what information the dataset
contains.
#Identify the quantity of tweets from each source.
final_source %>% group_by(source) %>% count(source, sort = TRUE)
#Identify aspects of the favorite_count variable.
favorites_w_user <- select(final_data, screen_name, favorite_count)
arrange(favorites_w_user, desc(favorite_count))
#Identify aspects of the retweet_count variable.
retweets_w_user <- select(final_data, screen_name, retweet_count)
arrange(retweets_w_user, desc(retweet_count))
Conclusions
Source Variable - LIVGolf has several hundred thousand followers, but
those followers only create a very small amount of conversation about
the series. While users as a whole create a larger total quantity of
tweets, throughout the observation period, no other user created as many
tweets about LIVGolf than the account “LIVGolfInv”.
Retweet Variable - As expected, the users with the highest amount of
followers garnered the highest number of retweets per post. It is
unexpected the amount of retweets that LIVGolf content creates among
their followers.
Favorite_count Variable - As expected, the users with the highest
amount of followers garnered the highest number of favorites per tweet.
It is unexpected that LIVGolf content creates such a relatively small
amount of likes in relation to their follower count.
Additional analysis is required to understand why users with
significantly smaller amounts of followers recieved a comparable amount
of favorites, specifically the users “20thCenturyDan” and
“AndrewKirbyGolf”. What is it about those two tweets that generated
engagement from thier followers and is it indicative of the reception of
LIVGolf?
3. Frequency Analysis
The purpose of the frequency analysis is to determine to amount of
conversation over time about LIV golf and to determine how much of the
conversation is contributed to LIVGolf creating the conversation, media
reporting on LIVGolf, and user generated content about the LIVGolf
topic.
source_by_type <- final_data %>% group_by(day_created, source) %>%
summarise(total_count = n (), .groups = "keep") %>%
as.data.frame()
source_by_type
Frequency Plot Activity
histogram_all_plot <- ggplot(source_by_type, aes(x = day_created,
y = total_count)) +
geom_bar(aes(color = source, fill = source), stat = "identity",
position = position_stack()) +
labs(title = "Source of LIVGolf Content",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Daily Count", x = "Date Created")
histogram_all_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Cumulative Relative Frequency Plot
rel_freq_plot <- ggplot(source_by_type, aes(x = day_created,
y = total_count)) +
geom_bar(aes(color = source, fill = source), stat = "identity",
position = "fill") +
labs(title = "Relative Frequency of Source",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Percent of Tweets", x = "Date Created")
rel_freq_plot + scale_x_continuous(breaks =
c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Conclusions
Media Contributors- Media reporting about LIVGolf saw a peak from the
13th to the 17th of November. Additional analysis is required to
determine the topic of conversation for media tweets.
All Contributors - The highest level of activity seen on the Twitter
platform from LIVGolf was on the 17th and 14th of November.
User Contribution - The highest level of activity seen on the
platform from users was on the 18th and of November. Additional analysis
could be conducted to understand content on the 18th of November to
greater understand what motivated users to post about LIVGolf. LivGolf
Contribution- The highest total number of tweets from LIvGolf occurred
on the 17th of November.
Additional analysis is required to understand the conversation about
LIVGolf during the peak conversational dates. It is possible to explore
the connection between number of tweets from LIVGolf and number of user
and media tweets.
4. Reception Analysis
Reception of the tweet is measured as a calculation of engagements
while taking into account that the amounts of followers vary between
users and vary over time. For the purposes of this research, reception
can equate to popularity of opinion. Therefore, measuring reception is
crucial to understanding dominant viewpoints about LIVGolf among
users.
Reception is the mean of retweets * 2 divided by the number of
followers and favorites divided by the number of followers times 100.
This calculation accounts for the reach achieved by the number of
followers as well as accounting for what occurs when a follower retweets
content. The number of users that could potentially interact with
content becomes larger as a user’s followers retweet content, thus,
there is potential for content to be seen by a larger number of users
than only followers.
The purpose of this section will be to understand the level of
reception to LIVGolf generated content during the observation period.
Then analysis will be conducted to determine if LIV Golf content is
either growing in popularity, decreasing in popularity, or remaining
relatively stable. A point of comparison is the mean reception score of
content generated by users. If significantly different, future analysis
could be conducted to understand the message behind content with higher
or lower reception scored content.
Reception Scatter Plot
#Plot reception over time for LIVGolf generated content.
LIV_Golf_reception_plot <- ggplot(LIVGolf, aes(x = day_created,
y = reception)) +
geom_point(size = 1, color = "red") +
labs(title = "Reception Score of LIVGolf Generated Tweets",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Reception Score", x = "Date of Tweet")
LIV_Golf_reception_plot + scale_x_continuous(breaks =
c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Mean Reception Score - LIV Golf
#Generate Mean reception score per day of observation for LIVGolf generated content because of instances where LIVGolf created multiple tweets per day.
LIV_daily_reception <- LIVGolf %>% group_by(day_created) %>% summarise(reception = mean(reception))
#Plot mean daily reception values of LIVGolf generated content.
daily_reception_plot <- ggplot(LIV_daily_reception, aes(x = day_created,
y = reception)) +
geom_point(size = 1, color = "red") +
geom_segment(aes(x = day_created, xend = day_created, y = 0, yend =
reception)) +
labs(title = "Mean Reception Score of LIVGolf Generated Tweets",
subtitle = "November 8th to November 27th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Reception Score", x = "Date of Tweet")
daily_reception_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Mean Reception Score - Users
#Create mean reception values for user generated content.
mean_user_daily_reception <- only_users %>% group_by(day_created) %>% summarise(reception = mean(reception))
#Plot mean daily reception values of user generated content.
mean_user_daily_reception_plot <- ggplot(mean_user_daily_reception,
aes(x = day_created,
y = reception)) +
geom_point(size = 1, color = "blue") +
geom_segment(aes(x = day_created, xend = day_created, y = 0, yend =
reception)) +
labs(title = "Mean Reception Score of User Tweets",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Reception Score", x = "Date of Tweet")
mean_user_daily_reception_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

only_users_reception <- select(only_users, day_created, reception) %>%
arrange(day_created)
only_users_reception$day_created <-
as.character(only_users_reception$day_created)
names(only_users_reception)[1] = "group"
Reception Boxplot - User
only_users_reception %>%
ggplot( aes(x = group, y = reception, fill = group)) +
geom_boxplot() +
theme(legend.position="none",
plot.title = element_text(size=15)) +
labs(title = "User Reception Boxplot",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Reception Score", x = "Date Collected")

LIV_followers_count <- select(LIVGolf, day_created, followers_count) %>% arrange(day_created)
LIV Golf Follower Plot
LIV_followers_plot <- LIV_followers_count %>%
ggplot( aes(x = day_created, y = followers_count)) +
geom_line() +
theme(legend.position="none",
plot.title = element_text(size=15)) +
labs(title = "Number of Users Following LIVGolf",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Followers", x = "Date")
LIV_followers_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Conclusions
LIVGolf Reception - LIVGolf experienced the highest level of
reception from its content from the 11th to 13th of November. Further
Analysis is required to understand potential reasons why content during
those dates was received differently than content on other days during
the collection period.
User Reception - November 11th and 13th experienced the highest
reception score of user generated content. From analysis in the boxplot,
it is evident that outliers impact the mean level of reception from user
generated tweets. The mean score is significantly lower if outliers were
to be removed. A small minority of content is driving reception scores
significantly higher on those dates.
Further analysis is required to determine if the content of the tweet
is responsible for the presence of the outliers in user generated
content. #Further analysis to understand reception will isolate tweets
with significantly higher reception scores to understand the content in
the tweets.
5. Sentiment Analysis
The purpose of this section is to understand the sentiment of opinion
being used in tweets created by LIVGolf and by user generated content.
The goal is to identify main ideas or topics of conversation to further
guage reception of the LIVGolf product. Additionally, by generating the
sentiment variable, research identify overall tone of the conversation
as being either positive or negative.
#remove media and advertisements to isolate only users and LIVGolf.
all_sentiment <- subset(final_data, source != "Media")
#Create data frame to understand sentiment over time.
sentiment_data <- select(final_data, day_created, sentiment)
#calculate mean sentiment score throughout the observation period
daily_sentiment <- sentiment_data %>% group_by(day_created) %>% summarise(sentiment = mean(sentiment))
daily_sentiment
Daily Sentiment Score - LIV Golf
#Create Plot of daily sentiment of tweets with LIVGolf
sentiment_plot <- ggplot(daily_sentiment, aes(x = day_created,
y = sentiment)) +
geom_point(size = 1, color = "blue") +
geom_segment(aes(x = day_created, xend = day_created, y = 0, yend =
sentiment)) +
labs(title = "Mean Sentiment Score of all LIVGolf Tweets",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Sentiment Score", x = "Date of Tweet")
sentiment_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

#understand user sentiment.
only_users_sentiment <- select(only_users, day_created, sentiment)
#calculate mean sentiment score throughout the observation period
user_sentiment_mean <- only_users_sentiment %>% group_by(day_created) %>% summarise(sentiment = mean(sentiment))
user_sentiment_mean
Daily Sentiment Score - User
#Create Plot of user authored tweets.
only_users_sentiment_plot <- ggplot(only_users_sentiment,
aes(x = day_created, y = sentiment)) +
geom_point(size = 1, color = "blue") +
labs(title = "Sentiment Scores of User Tweets",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Sentiment Score", x = "Date of Tweet")
only_users_sentiment_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

Mean Sentiment Score - User
#Create Plot of user authored tweets.
user_sentiment_mean_plot <- ggplot(user_sentiment_mean,
aes(x = day_created, y = sentiment)) +
geom_point(size = 1, color = "blue") +
geom_segment(aes(x = day_created, xend = day_created, y = 0, yend =
sentiment)) +
labs(title = "Mean Daily Sentiment of User Tweets",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Sentiment Score", x = "Date of Tweet")
user_sentiment_mean_plot + scale_x_continuous(breaks = c(8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29))

only_users_sentiment$day_created <-
as.character(only_users_sentiment$day_created)
names(only_users_sentiment)[1] = "group"
#Boxplot of user sentiment per day of collection
only_users_sentiment %>%
ggplot( aes(x = group, y = sentiment, fill = group)) +
geom_boxplot() +
theme(legend.position="none",
plot.title = element_text(size=15)) +
labs(title = "User Sentiment Boxplot by Date",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Sentiment Score", x = "Date Collected")

#Violin chart of user sentiment per day of collection
only_users_sentiment %>%
ggplot( aes(x = group, y = sentiment, fill = group)) +
geom_violin() +
theme(legend.position="none",
plot.title = element_text(size=15)) +
labs(title = "User Sentiment Violin Chart by Date",
subtitle = "November 8th to November 28th 2022",
caption = "Data Source: Twitter search for #LIVGolf") +
labs(y = "Sentiment Score", x = "Date Collected")

Conclusions
Sentiment of the entire dataset reveals that the 16th and 19th of
November resulted in the dates with the lowest sentiment values. Even
when the dataset of tweets was separated by author, the 16th and 19th
had the lowest sentiment scores.
The 24th of November resulted in the highest mean sentiment score for
all tweets and when analyzed by source. The violin plot and boxplot
illustrates a relatively tight clustering of sentiment scores without
any outliers.
The violin plot illustrates the range of sentiment scores and the
amount of occurrences where observations are clustered. For example, the
22nd of November experienced a large amount of content with sentiment
score close to zero, therefore, the violin is wider at that score. The
22nd also reveals height based on the presence of outliers that were
significantly far away from the mean. The fewer the amount of outliers,
and the wider the plot reveals a more consistent sentiment score for
that date in the observation period. The 8th, 9th, and 13th experienced
no outliers in sentiment.
6. Emotion Analysis
The purpose of this section is to understand the emotions conveyed
through language analysis of the text in observed content.
#create dataframe to capture emotions from all tweets.
ew_sentiment_LIV <- get_nrc_sentiment(LIVGolf$text_clean)
sentimentscores_LIV <- data.frame(colSums(ew_sentiment_LIV[,]))
names(sentimentscores_LIV) <- "Score"
sentimentscores_LIV <- cbind("sentiment" =
rownames(sentimentscores_LIV),sentimentscores_LIV)
rownames(sentimentscores_LIV) <- NULL
Emotion Frequency Plot - LIV Golf
#plot sentiment of emotions to understand the general tone of conversation about LIVGolf.
emotion_plot_LIV <- ggplot(data = sentimentscores_LIV,
aes(x = sentiment, y = Score)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiments") + ylab("Word Count by Emotion") +
labs(title = "Emotion Count of LIV Golf Content",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf")
emotion_plot_LIV + theme(axis.text.x = element_text(angle = 45, vjust = 1,
hjust=1))

#create dataframe to capture emotions from all user generated content.
ew_sentiment_users <- get_nrc_sentiment(only_users$text_clean)
sentimentscores_users <- data.frame(colSums(ew_sentiment_users[,]))
names(sentimentscores_users) <- "Score"
sentimentscores_users <- cbind("sentiment" =
rownames(sentimentscores_users),
sentimentscores_users)
rownames(sentimentscores_users) <- NULL
Emotion Frequency Plot - All Content
#plot sentiment of emotions to understand the general tone of conversation about LIVGolf.
emotion_plot_users <- ggplot(data = sentimentscores_users,
aes(x = sentiment, y = Score)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiments") + ylab("Word Count") +
labs(title = "Emotion Count of User Generated Content",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf")
emotion_plot_users + theme(axis.text.x = element_text(angle = 45, vjust = 1,
hjust=1))

Conclusions
LIVGolf - As expected, LIVGolf emotion count is indicative of an
organization attempting to gather followers, spread awareness of their
brand, and is largely positive. The LIVGolf content is expressing
anticipation for future events with an overall positive emotion.
Users - When analyzing user generated content, the dominant emotion
is happiness but there is a significant increase in the negativity
conveyed in the tweet. Additionally, emotions like sadness, anger, and
fear also increased.
7. Text Analysis
The purpose of this section is to further understand the conversation
and topics discussed in tweets from LIVGolf and from users during the
collection period.
#Create Corpus for LIVGolf authored content.
text_LIV <- Corpus(VectorSource(LIVGolf$text_clean))
#Clean the text variable
text_LIV <- text_LIV %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
text_LIV <- tm_map(text_LIV, content_transformer(tolower))
text_LIV <- tm_map(text_LIV, removeWords, stopwords("english"))
text_LIV <- tm_map(text_LIV, removeWords, c("LIV", "LIVGolf", "livgolf",
"httpstcoiflvyyalvx"))
#Create dataframes for the words used and capture the frequency of use.
dtm_LIV <- TermDocumentMatrix(text_LIV)
matrix_text_LIV <- as.matrix(dtm_LIV)
words_text_LIV <- sort(rowSums(matrix_text_LIV),decreasing=TRUE)
df_text_LIV <- data.frame(word = names(words_text_LIV),freq=words_text_LIV)
Worldcloud - LIV Golf Authored Content
#Create Wordcloud of most frequently used words used in LIVGolf authored content.
wordcloud(words = df_text_LIV$word, freq = df_text_LIV$freq, min.freq = 2, max.words=200,random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Create Corpus for user generated content.
text_user <- Corpus(VectorSource(only_users$text_clean))
#Clean the text variable
text_user <- text_user %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
text_user <- tm_map(text_user, content_transformer(tolower))
text_user <- tm_map(text_user, removeWords, stopwords("english"))
text_user <- tm_map(text_user, removeWords, c("liv", "livgolfinv", "livgolf",
"'s", "said", "day", "amp",
"can"))
#Create dataframes for the words used and capture the frequency of use.
dtm_user <- TermDocumentMatrix(text_user)
matrix_text_user <- as.matrix(dtm_user)
words_text_user <- sort(rowSums(matrix_text_user),decreasing=TRUE)
df_text_user <- data.frame(word = names(words_text_user),freq=words_text_user)
Worldcloud - User Authored Content
#Create Wordcloud of most frequently used words used in LIVGolf authored content.
wordcloud(words = df_text_user$word, freq = df_text_user$freq, min.freq = 12, max.words=200,random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

final_data %>%
unnest_tokens(output = word, input = text_clean) %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
slice(1:10)
bigram_words <- final_data %>%
unnest_tokens(
input = text_clean,
output = bigram,
token = 'ngrams',
n = 2) %>%
filter(! is.na(bigram))
bigram_words %>%
select(bigram) %>%
head(10)
bigram_words <- bigram_words %>%
separate(col = bigram, into = c('word1', 'word2'), sep = ' ')
bigram_count <- bigram_words %>%
count(bigram_words$word1, bigram_words$word2, sort = TRUE) %>%
rename(weight = n)
bigram_count %>% head()
ScaleWeight <- function(x, lambda) {
x / lambda}
threshold <- 4
User Content Bigram
network <- bigram_count %>%
filter(weight == threshold) %>%
mutate(weight == ScaleWeight(x = weight, lambda = 2E3)) %>%
graph_from_data_frame(directed = FALSE)
plot(
network,
vertex.size = 1,
vertex.label.color = 'black',
vertex.label.cex = 0.5,
vertex.label.dist = 1,
edge.color = 'gray',
main = 'Bigram Count Network',
sub = glue('Weight Threshold: {threshold}'),
alpha = 50)

Conclusions
Wordcloud LIVGOLF - Content generated by LIV Golf is largely
comprised on words indicative of fostering excitement and attention to
the series.
Wordcloud Users - Users expose a greater amount of word usage and
emotions behind those words than LIV Golf. A high frequency of using PGA
Tour by users can be indicative of either support or resentment for the
competing league and may not necessarily advocate for either league.
Additionally, there are word choices by users that very significantly
from LIV Golf as they have attempted to distance themselves from Saudi
Arabian connection. The users continue to talk about that topic.
Bigram Network - The Bigram network shows that the words used in
speaking about LIV Golf represents a very divided user base between
accepting LIV for the a sporting league or highlighting its connection
to the government of Saudi Arabia.
8. Correlation
The purpose of this section is to understand if there is any
connection between how well a tweet is received and its overall
sentiment. This section should answer the question if the most negative
or most positive tweets are more or less likely to be the content that
is engaged with the most.
correlation_test <- subset(final_data, select = -c(day_created, screen_name, location, followers_count, friends_count, listed_count, favourites_count, statuses_count, source, retweet_count, retweet_ratio, favorite_ratio, favorite_count, text_clean))
#Determine relationship between reception and sentiment for entire data set
cor.matrix_a <- cor(correlation_test[1:2])
cor.matrix_a
## reception sentiment
## reception 1.00000000 0.08203386
## sentiment 0.08203386 1.00000000
#Eliminate excess variables
correlation_test_LIV <- subset(LIVGolf, select = -c(day_created, screen_name, location, followers_count, friends_count, listed_count, favourites_count, statuses_count, source, retweet_count, retweet_ratio, favorite_ratio, favorite_count, text_clean))
#Determine relationship between reception and sentiment for LIV Golf data frame
cor.matrixb <- cor(correlation_test_LIV[1:2])
cor.matrixb
## reception sentiment
## reception 1.00000000 -0.04597686
## sentiment -0.04597686 1.00000000
#Eliminate excess variables
correlation_test_only_users <- subset(only_users, select = -c(day_created, screen_name, location, followers_count, friends_count, listed_count, favourites_count, statuses_count, source, retweet_count, retweet_ratio, favorite_ratio, favorite_count, text_clean))
#Determine relationship between reception and sentiment for LIV Golf data frame
cor.matrixc <- cor(correlation_test_only_users[1:2])
cor.matrixc
## reception sentiment
## reception 1.0000000 0.0874266
## sentiment 0.0874266 1.0000000
9. Linear Regression
The purpose of this section is to understand future activity from
users on the platform by testing the connection between how frequently
LIV Golf authors content and measuring the impact that has on motivating
users to generate content.
I will create a model that will predict user activity based on the
amount of activity from LIV Golf.
#x = number of posts each day by LIV Golf
#y = number of user posts
#For every one LIV Golf post we should expect a response of an additional 1.031 user posts.
x <- c(1,2,1,1,1,2,3,2,2,4,2,1,0,1,1,1,1,0,1,1,0,0)
y <- c(15,14,18,28,31,26,32,24,30,26,37,25,28,30,29,26,16,13,19,22,23,38)
LIV_user_relationship <- lm(y~x)
print(LIV_user_relationship)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 23.687 1.031
data <- data.frame(y,x)
cor.matrixd <- cor(data[1:2])
cor.matrixd
## y x
## y 1.0000000 0.1450017
## x 0.1450017 1.0000000
summary(LIV_user_relationship)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.7500 -4.9687 0.2656 4.3047 14.3125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.687 2.510 9.436 0.00000000833 ***
## x 1.031 1.573 0.655 0.52
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.101 on 20 degrees of freedom
## Multiple R-squared: 0.02103, Adjusted R-squared: -0.02792
## F-statistic: 0.4295 on 1 and 20 DF, p-value: 0.5197
RSE <- 7.101
error <- RSE/mean(y)
error
## [1] 0.28404
a <- data.frame(x = 10)
result <- predict(LIV_user_relationship, a)
print(result)
## 1
## 34
Linear Regression Plot
linear_plot <- ggplot(data, aes(x = x, y = y)) +
geom_point( color="#69b3a2") +
geom_smooth(method=lm , color="red", se=TRUE) +
theme(legend.position = "none") +
xlab("Number of LIV Golf Tweets") +
ylab("Number of User Tweets") +
labs(title = "Linear Regression Model of LIVGolf and User Generated Content",
subtitle = "November 8th to November 29th 2022",
caption = "Data Source: Twitter search for #LIVGolf")
linear_plot

Conclusion
If all other variables remain constant, there is a connection between
how many tweets LIV Golf creates and the response from users on the
platform. While this model describes the a relationship between users
and LIVGolf twitter activity, from a business perspective, it paints the
picture of an organization failing to motivate its followers to talk
about their product and attract additional followers. If LIVGolf created
zero tweets, it would be reasonable to expect 23 user generated tweets.
However, as they become more active on the platform, their efforts are
not returned at a very high rate, especially when considering the amount
of followers they have. If users generate 10 tweets, it is reasonable to
expect 34 user tweets, which may not be worth their time. The model
created during this study creates an average prediction error rate of
28%.
