Set working directory
Library neccessary
library(httr)
library(jsonlite)
library(SentimentAnalysis)
library(ggplot2)
library(tidyr)
library(scales)
Define key and API video
Plot sentiment scores obtained from different methods
sentiments_filtered <- na.omit(sentiments)
ggplot(sentiments_filtered, aes(x = SentimentGI, y = SentimentHE)) +
geom_point(color = "skyblue", alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "orange") +
labs(x = "Sentiment (General Inquirer)", y = "Sentiment (Harvard IV-4)") +
theme_minimal() + # Apply a minimal theme
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.position = "none"
) +
ggtitle("Comparison of Sentiment Analysis Methods")
## `geom_smooth()` using formula = 'y ~ x'

Plot sentiment scores obtained from different methods
ggplot(sentiments_filtered, aes(x = SentimentGI, y = SentimentHE)) +
geom_point(color = "skyblue", alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "orange") +
labs(x = "Sentiment (General Inquirer)", y = "Sentiment (Harvard IV-4)") +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.position = "none"
) +
ggtitle("Comparison of Sentiment Analysis Methods") +
geom_text(data = subset(sentiments_filtered, SentimentGI > 0.5 | SentimentHE > 0.5),
aes(label = WordCount), hjust = -0.2, vjust = 0.5, size = 3, color = "red") +
geom_vline(xintercept = 0.5, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
geom_text(x = 0.1, y = 0.8, label = "Trend line equation: y = 0.8x + 0.1", size = 5, color = "blue")
## `geom_smooth()` using formula = 'y ~ x'

# Extract relevant information
published_at <- as.POSIXct(video_metadata$items$snippet$publishedAt)
like_count <- as.numeric(video_metadata$items$statistics$likeCount)
comment_count <- as.numeric(video_metadata$items$statistics$commentCount)
Engagement metrics over time
engagement_data <- data.frame(Date = published_at, Likes = like_count, Comments = comment_count)
ggplot(engagement_data, aes(x = Date)) +
geom_line(aes(y = Likes, color = "Likes")) +
geom_line(aes(y = Comments, color = "Comments")) +
labs(title = "Engagement Metrics Over Time",
x = "Date",
y = "Count",
color = "Metric") +
theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ā¹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ā¹ Do you need to adjust the group aesthetic?

# Convert Date column to POSIXct format
engagement_data$Date <- as.POSIXct(engagement_data$Date)
# Split the data into multiple rows
engagement_data <- separate_rows(engagement_data, Comments, sep = ";")
# Convert Comments column to numeric
engagement_data$Comments <- as.numeric(engagement_data$Comments)
# Reshape the data into long format
engagement_data_long <- pivot_longer(engagement_data, cols = c(Likes, Comments), names_to = "Metric", values_to = "Count")
# Create the bar plot
ggplot(engagement_data_long, aes(x = Metric, y = Count, fill = Metric)) +
geom_bar(stat = "identity") +
labs(title = "Engagement Metrics",
x = "Metric",
y = "Count",
fill = "Metric") +
scale_fill_manual(values = c("Likes" = "blue", "Comments" = "green")) +
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M"))

# Convert Date column to POSIXct format
engagement_data$Date <- as.POSIXct(engagement_data$Date)
# Split the data into multiple rows
engagement_data <- separate_rows(engagement_data, Comments, sep = ";")
# Convert Comments column to numeric
engagement_data$Comments <- as.numeric(engagement_data$Comments)
# Create a data frame with the provided information
engagement_info <- data.frame(
Metric = c("Like Count", "Comment Count"),
Count = c(engagement_data$Likes[1], engagement_data$Comments[1])
)
# Create a bar plot
ggplot(engagement_info, aes(x = Metric, y = Count, fill = Metric)) +
geom_bar(stat = "identity") +
labs(title = "Engagement Metrics",
x = "Metric",
y = "Count",
fill = "Metric") +
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M"))

# Access the items list
comment_items <- video_comments$items
# Check the structure of the first item
str(comment_items[[1]])
## List of 4
## $ kind : chr "youtube#commentThread"
## $ etag : chr "sMEtwYMa3Y7QlLbCzVObGBvRoDk"
## $ id : chr "Ugx-gughDP5ivWRJfJV4AaABAg"
## $ snippet:List of 6
## ..$ channelId : chr "UCSJ4gkVC6NrvII8umztf0Ow"
## ..$ videoId : chr "5qap5aO4i9A"
## ..$ topLevelComment:List of 4
## .. ..$ kind : chr "youtube#comment"
## .. ..$ etag : chr "HpATRLM1ohf8-4EYTtr4k4vKt70"
## .. ..$ id : chr "Ugx-gughDP5ivWRJfJV4AaABAg"
## .. ..$ snippet:List of 13
## .. .. ..$ channelId : chr "UCSJ4gkVC6NrvII8umztf0Ow"
## .. .. ..$ videoId : chr "5qap5aO4i9A"
## .. .. ..$ textDisplay : chr "The livestream is back online! Come and say hi <a href=\"UCSJ4gkVC6NrvII8umztf0Ow/XCWUYd65DOm6x_AP0_iNgAk\"></a"| __truncated__
## .. .. ..$ textOriginal : chr "The livestream is back online! Come and say hi \nā https://www.youtube.com/watch?v=jfKfPfyJRdk&ab_channel=LofiGirl"
## .. .. ..$ authorDisplayName : chr "@LofiGirl"
## .. .. ..$ authorProfileImageUrl: chr "https://yt3.ggpht.com/M0eY1tfgiwuyqrSlWIkzf5-6RZSARiuChjpXyZe-hfl9C2fn-I4leLtxKAqYqGZv_FgE4u5TKQ=s48-c-k-c0x00ffffff-no-rj"
## .. .. ..$ authorChannelUrl : chr "http://www.youtube.com/@LofiGirl"
## .. .. ..$ authorChannelId :List of 1
## .. .. .. ..$ value: chr "UCSJ4gkVC6NrvII8umztf0Ow"
## .. .. ..$ canRate : logi TRUE
## .. .. ..$ viewerRating : chr "none"
## .. .. ..$ likeCount : int 29982
## .. .. ..$ publishedAt : chr "2022-07-10T16:44:02Z"
## .. .. ..$ updatedAt : chr "2022-07-12T15:55:49Z"
## ..$ canReply : logi TRUE
## ..$ totalReplyCount: int 490
## ..$ isPublic : logi TRUE
# Explore the content of the first item
comment_items[[1]]
## $kind
## [1] "youtube#commentThread"
##
## $etag
## [1] "sMEtwYMa3Y7QlLbCzVObGBvRoDk"
##
## $id
## [1] "Ugx-gughDP5ivWRJfJV4AaABAg"
##
## $snippet
## $snippet$channelId
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
##
## $snippet$videoId
## [1] "5qap5aO4i9A"
##
## $snippet$topLevelComment
## $snippet$topLevelComment$kind
## [1] "youtube#comment"
##
## $snippet$topLevelComment$etag
## [1] "HpATRLM1ohf8-4EYTtr4k4vKt70"
##
## $snippet$topLevelComment$id
## [1] "Ugx-gughDP5ivWRJfJV4AaABAg"
##
## $snippet$topLevelComment$snippet
## $snippet$topLevelComment$snippet$channelId
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
##
## $snippet$topLevelComment$snippet$videoId
## [1] "5qap5aO4i9A"
##
## $snippet$topLevelComment$snippet$textDisplay
## [1] "The livestream is back online! Come and say hi <a href=\"UCSJ4gkVC6NrvII8umztf0Ow/XCWUYd65DOm6x_AP0_iNgAk\"></a><br>ā <a href=\"https://www.youtube.com/watch?v=jfKfPfyJRdk\">https://www.youtube.com/watch?v=jfKfPfyJRdk&ab_channel=LofiGirl</a>"
##
## $snippet$topLevelComment$snippet$textOriginal
## [1] "The livestream is back online! Come and say hi \nā https://www.youtube.com/watch?v=jfKfPfyJRdk&ab_channel=LofiGirl"
##
## $snippet$topLevelComment$snippet$authorDisplayName
## [1] "@LofiGirl"
##
## $snippet$topLevelComment$snippet$authorProfileImageUrl
## [1] "https://yt3.ggpht.com/M0eY1tfgiwuyqrSlWIkzf5-6RZSARiuChjpXyZe-hfl9C2fn-I4leLtxKAqYqGZv_FgE4u5TKQ=s48-c-k-c0x00ffffff-no-rj"
##
## $snippet$topLevelComment$snippet$authorChannelUrl
## [1] "http://www.youtube.com/@LofiGirl"
##
## $snippet$topLevelComment$snippet$authorChannelId
## $snippet$topLevelComment$snippet$authorChannelId$value
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
##
##
## $snippet$topLevelComment$snippet$canRate
## [1] TRUE
##
## $snippet$topLevelComment$snippet$viewerRating
## [1] "none"
##
## $snippet$topLevelComment$snippet$likeCount
## [1] 29982
##
## $snippet$topLevelComment$snippet$publishedAt
## [1] "2022-07-10T16:44:02Z"
##
## $snippet$topLevelComment$snippet$updatedAt
## [1] "2022-07-12T15:55:49Z"
##
##
##
## $snippet$canReply
## [1] TRUE
##
## $snippet$totalReplyCount
## [1] 490
##
## $snippet$isPublic
## [1] TRUE
# Extract relevant information from the snippet section
comment_text <- video_comments$snippet$topLevelComment$snippet$textDisplay
author_name <- video_comments$snippet$topLevelComment$snippet$authorDisplayName
like_count <- video_comments$snippet$topLevelComment$snippet$likeCount
published_at <- video_comments$snippet$topLevelComment$snippet$publishedAt
# Create a dataframe to store the extracted information
comment_data <- data.frame(
Comment = comment_text,
Author = author_name,
Likes = like_count,
PublishedAt = as.POSIXct(published_at)
)
# Print the extracted data
print(comment_data)
## [1] PublishedAt
## <0 rows> (or 0-length row.names)
# Assuming video_metadata is the object containing the video metadata
# Accessing the 'items' data frame within the video_metadata object
video_items <- video_metadata$items
# Assuming there is only one row in the 'items' data frame
# Accessing the 'snippet' list within the first row of the video_items data frame
snippet <- video_items$snippet
# Accessing the 'topLevelComment' list within the 'snippet' list
top_level_comment <- snippet$topLevelComment
# Accessing the 'textDisplay' field within the 'snippet' list to get the comment text
comment_text <- top_level_comment$snippet$textDisplay
# Printing the comment text
print(comment_text)
## NULL
View data structure
str(engagement_data)
## tibble [1 Ć 3] (S3: tbl_df/tbl/data.frame)
## $ Date : POSIXct[1:1], format: "2022-07-10"
## $ Likes : num 7727952
## $ Comments: num 17949
- At this point, i would stop the analysis because this dataframe
contains only one observation. My next study will have more observations
for further analysis.