Set working directory

Library neccessary

library(httr)
library(jsonlite)
library(SentimentAnalysis)
library(ggplot2)
library(tidyr)
library(scales)

Defining a function to fetch metadata

get_video_metadata <- function(api_key, video_id) {
  # Construct the URL
  url <- sprintf("https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id=%s&key=%s", video_id, api_key)
  
  # Fetch the response
  response <- GET(url)
  
  # Check for successful response
  if (http_type(response) != "application/json") {
    stop("API request failed. Please check my API key and video ID.")
  }
  
  # Convert raw content to text
  response_text <- rawToChar(response$content)
  
  # Parse JSON content
  video_data <- jsonlite::fromJSON(response_text)
  
  return(video_data)
}

Define key and API video

Fetch metadata

video_metadata <- get_video_metadata(api_key, video_id)

Extraction of relevant info

video_title <- video_metadata$items$snippet$title
view_count <- as.numeric(video_metadata$items$statistics$viewCount)
like_count <- as.numeric(video_metadata$items$statistics$likeCount)
comment_count <- as.numeric(video_metadata$items$statistics$commentCount)

# Check if dislike count is available and if it's 0
if ("dislikeCount" %in% names(video_metadata$items$statistics) && as.numeric(video_metadata$items$statistics$dislikeCount) == 0) {
  dislike_count <- 0
} else {
  dislike_count <- as.numeric(video_metadata$items$statistics$dislikeCount)
}

Printing extracted info

print(paste("Video Title:", video_title))
## [1] "Video Title: lofi hip hop radio - beats to relax/study to"
print(paste("View Count:", view_count))
## [1] "View Count: 668023703"
print(paste("Like Count:", like_count))
## [1] "Like Count: 7727952"
print(paste("Dislike Count:", dislike_count))
## [1] "Dislike Count: "
print(paste("Comment Count:", comment_count))
## [1] "Comment Count: 17949"

Perform sentiment analysis on all_comments

get_video_comments <- function(api_key, video_id) {
  url <- sprintf("https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=%s&key=%s", video_id, api_key)
  response <- httr::GET(url)
  data <- httr::content(response, "parsed")
  return(data)
}

video_comments <- get_video_comments(api_key, video_id)

comment_text <- lapply(video_comments$items, function(comment) {
  snippet <- comment$snippet
  snippet$topLevelComment$snippet$textDisplay
})

all_comments <- unlist(comment_text)

sentiments <- analyzeSentiment(all_comments)

print(sentiments)
##    WordCount SentimentGI NegativityGI PositivityGI SentimentHE NegativityHE
## 1          7  0.28571429   0.00000000    0.2857143  0.00000000            0
## 2          0         NaN          NaN          NaN         NaN          NaN
## 3          3  0.00000000   0.00000000    0.0000000  0.00000000            0
## 4          8  0.75000000   0.00000000    0.7500000  0.00000000            0
## 5          5  0.20000000   0.00000000    0.2000000  0.00000000            0
## 6          3  0.00000000   0.00000000    0.0000000  0.00000000            0
## 7          0         NaN          NaN          NaN         NaN          NaN
## 8          3  0.00000000   0.00000000    0.0000000  0.00000000            0
## 9          4  0.00000000   0.00000000    0.0000000  0.00000000            0
## 10         4  0.00000000   0.00000000    0.0000000  0.00000000            0
## 11        37  0.08108108   0.02702703    0.1081081  0.02702703            0
## 12         3  0.00000000   0.00000000    0.0000000  0.00000000            0
## 13         3  0.00000000   0.00000000    0.0000000  0.00000000            0
## 14         5  0.40000000   0.00000000    0.4000000  0.00000000            0
## 15         7  0.00000000   0.00000000    0.0000000  0.14285714            0
## 16         1  0.00000000   0.00000000    0.0000000  0.00000000            0
## 17         2  0.00000000   0.00000000    0.0000000  0.00000000            0
## 18         4  0.00000000   0.00000000    0.0000000  0.00000000            0
## 19         6  0.16666667   0.00000000    0.1666667  0.00000000            0
## 20         7  0.00000000   0.14285714    0.1428571  0.00000000            0
##    PositivityHE SentimentLM NegativityLM PositivityLM RatioUncertaintyLM
## 1    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 2           NaN         NaN          NaN          NaN                NaN
## 3    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 4    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 5    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 6    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 7           NaN         NaN          NaN          NaN                NaN
## 8    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 9    0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 10   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 11   0.02702703   0.0000000   0.02702703   0.02702703         0.08108108
## 12   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 13   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 14   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 15   0.14285714   0.1428571   0.00000000   0.14285714         0.00000000
## 16   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 17   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 18   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 19   0.00000000   0.0000000   0.00000000   0.00000000         0.00000000
## 20   0.00000000  -0.1428571   0.14285714   0.00000000         0.14285714
##    SentimentQDAP NegativityQDAP PositivityQDAP
## 1     0.14285714     0.00000000     0.14285714
## 2            NaN            NaN            NaN
## 3     0.00000000     0.00000000     0.00000000
## 4     0.50000000     0.00000000     0.50000000
## 5     0.20000000     0.00000000     0.20000000
## 6     0.00000000     0.00000000     0.00000000
## 7            NaN            NaN            NaN
## 8     0.00000000     0.00000000     0.00000000
## 9     0.00000000     0.00000000     0.00000000
## 10    0.00000000     0.00000000     0.00000000
## 11    0.02702703     0.02702703     0.05405405
## 12    0.00000000     0.00000000     0.00000000
## 13    0.00000000     0.00000000     0.00000000
## 14    0.40000000     0.00000000     0.40000000
## 15    0.14285714     0.00000000     0.14285714
## 16    0.00000000     0.00000000     0.00000000
## 17    0.00000000     0.00000000     0.00000000
## 18    0.00000000     0.00000000     0.00000000
## 19    0.00000000     0.00000000     0.00000000
## 20   -0.14285714     0.14285714     0.00000000

Plot sentiment scores obtained from different methods

sentiments_filtered <- na.omit(sentiments)


ggplot(sentiments_filtered, aes(x = SentimentGI, y = SentimentHE)) +
  geom_point(color = "skyblue", alpha = 0.7) +  
  geom_smooth(method = "lm", se = FALSE, color = "orange") +  
  labs(x = "Sentiment (General Inquirer)", y = "Sentiment (Harvard IV-4)") +
  theme_minimal() +  # Apply a minimal theme
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),  
    axis.text = element_text(size = 12),  
    axis.title = element_text(size = 14), 
    legend.position = "none"  
  ) +
  ggtitle("Comparison of Sentiment Analysis Methods") 
## `geom_smooth()` using formula = 'y ~ x'

Plot sentiment scores obtained from different methods

ggplot(sentiments_filtered, aes(x = SentimentGI, y = SentimentHE)) +
  geom_point(color = "skyblue", alpha = 0.7) + 
  geom_smooth(method = "lm", se = FALSE, color = "orange") + 
  labs(x = "Sentiment (General Inquirer)", y = "Sentiment (Harvard IV-4)") +
  theme_minimal() +  
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.text = element_text(size = 12),  
    axis.title = element_text(size = 14), 
    legend.position = "none"  
  ) +
  ggtitle("Comparison of Sentiment Analysis Methods") + 
 
  geom_text(data = subset(sentiments_filtered, SentimentGI > 0.5 | SentimentHE > 0.5), 
            aes(label = WordCount), hjust = -0.2, vjust = 0.5, size = 3, color = "red") +
  
  geom_vline(xintercept = 0.5, linetype = "dashed", color = "red") +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  
  geom_text(x = 0.1, y = 0.8, label = "Trend line equation: y = 0.8x + 0.1", size = 5, color = "blue")
## `geom_smooth()` using formula = 'y ~ x'

# Extract relevant information
published_at <- as.POSIXct(video_metadata$items$snippet$publishedAt)
like_count <- as.numeric(video_metadata$items$statistics$likeCount)
comment_count <- as.numeric(video_metadata$items$statistics$commentCount)

Printing extracted info

print(paste("Published At:", published_at))
## [1] "Published At: 2022-07-10"
print(paste("Like Count:", like_count))
## [1] "Like Count: 7727952"
print(paste("Comment Count:", comment_count))
## [1] "Comment Count: 17949"

Engagement metrics over time

engagement_data <- data.frame(Date = published_at, Likes = like_count, Comments = comment_count)

ggplot(engagement_data, aes(x = Date)) +
  geom_line(aes(y = Likes, color = "Likes")) +
  geom_line(aes(y = Comments, color = "Comments")) +
  labs(title = "Engagement Metrics Over Time",
       x = "Date",
       y = "Count",
       color = "Metric") +
  theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Convert Date column to POSIXct format
engagement_data$Date <- as.POSIXct(engagement_data$Date)

# Split the data into multiple rows
engagement_data <- separate_rows(engagement_data, Comments, sep = ";")

# Convert Comments column to numeric
engagement_data$Comments <- as.numeric(engagement_data$Comments)

# Reshape the data into long format
engagement_data_long <- pivot_longer(engagement_data, cols = c(Likes, Comments), names_to = "Metric", values_to = "Count")

# Create the bar plot
ggplot(engagement_data_long, aes(x = Metric, y = Count, fill = Metric)) +
  geom_bar(stat = "identity") +
  labs(title = "Engagement Metrics",
       x = "Metric",
       y = "Count",
       fill = "Metric") +
  scale_fill_manual(values = c("Likes" = "blue", "Comments" = "green")) +
  scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M"))

# Convert Date column to POSIXct format
engagement_data$Date <- as.POSIXct(engagement_data$Date)

# Split the data into multiple rows
engagement_data <- separate_rows(engagement_data, Comments, sep = ";")

# Convert Comments column to numeric
engagement_data$Comments <- as.numeric(engagement_data$Comments)

# Create a data frame with the provided information
engagement_info <- data.frame(
  Metric = c("Like Count", "Comment Count"),
  Count = c(engagement_data$Likes[1], engagement_data$Comments[1])
)

# Create a bar plot
ggplot(engagement_info, aes(x = Metric, y = Count, fill = Metric)) +
  geom_bar(stat = "identity") +
  labs(title = "Engagement Metrics",
       x = "Metric",
       y = "Count",
       fill = "Metric") +
  scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M"))

# Access the items list
comment_items <- video_comments$items

# Check the structure of the first item
str(comment_items[[1]])
## List of 4
##  $ kind   : chr "youtube#commentThread"
##  $ etag   : chr "sMEtwYMa3Y7QlLbCzVObGBvRoDk"
##  $ id     : chr "Ugx-gughDP5ivWRJfJV4AaABAg"
##  $ snippet:List of 6
##   ..$ channelId      : chr "UCSJ4gkVC6NrvII8umztf0Ow"
##   ..$ videoId        : chr "5qap5aO4i9A"
##   ..$ topLevelComment:List of 4
##   .. ..$ kind   : chr "youtube#comment"
##   .. ..$ etag   : chr "HpATRLM1ohf8-4EYTtr4k4vKt70"
##   .. ..$ id     : chr "Ugx-gughDP5ivWRJfJV4AaABAg"
##   .. ..$ snippet:List of 13
##   .. .. ..$ channelId            : chr "UCSJ4gkVC6NrvII8umztf0Ow"
##   .. .. ..$ videoId              : chr "5qap5aO4i9A"
##   .. .. ..$ textDisplay          : chr "The livestream is back online! Come and say hi <a href=\"UCSJ4gkVC6NrvII8umztf0Ow/XCWUYd65DOm6x_AP0_iNgAk\"></a"| __truncated__
##   .. .. ..$ textOriginal         : chr "The livestream is back online! Come and say hi \n→ https://www.youtube.com/watch?v=jfKfPfyJRdk&ab_channel=LofiGirl"
##   .. .. ..$ authorDisplayName    : chr "@LofiGirl"
##   .. .. ..$ authorProfileImageUrl: chr "https://yt3.ggpht.com/M0eY1tfgiwuyqrSlWIkzf5-6RZSARiuChjpXyZe-hfl9C2fn-I4leLtxKAqYqGZv_FgE4u5TKQ=s48-c-k-c0x00ffffff-no-rj"
##   .. .. ..$ authorChannelUrl     : chr "http://www.youtube.com/@LofiGirl"
##   .. .. ..$ authorChannelId      :List of 1
##   .. .. .. ..$ value: chr "UCSJ4gkVC6NrvII8umztf0Ow"
##   .. .. ..$ canRate              : logi TRUE
##   .. .. ..$ viewerRating         : chr "none"
##   .. .. ..$ likeCount            : int 29982
##   .. .. ..$ publishedAt          : chr "2022-07-10T16:44:02Z"
##   .. .. ..$ updatedAt            : chr "2022-07-12T15:55:49Z"
##   ..$ canReply       : logi TRUE
##   ..$ totalReplyCount: int 490
##   ..$ isPublic       : logi TRUE
# Explore the content of the first item
comment_items[[1]]
## $kind
## [1] "youtube#commentThread"
## 
## $etag
## [1] "sMEtwYMa3Y7QlLbCzVObGBvRoDk"
## 
## $id
## [1] "Ugx-gughDP5ivWRJfJV4AaABAg"
## 
## $snippet
## $snippet$channelId
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
## 
## $snippet$videoId
## [1] "5qap5aO4i9A"
## 
## $snippet$topLevelComment
## $snippet$topLevelComment$kind
## [1] "youtube#comment"
## 
## $snippet$topLevelComment$etag
## [1] "HpATRLM1ohf8-4EYTtr4k4vKt70"
## 
## $snippet$topLevelComment$id
## [1] "Ugx-gughDP5ivWRJfJV4AaABAg"
## 
## $snippet$topLevelComment$snippet
## $snippet$topLevelComment$snippet$channelId
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
## 
## $snippet$topLevelComment$snippet$videoId
## [1] "5qap5aO4i9A"
## 
## $snippet$topLevelComment$snippet$textDisplay
## [1] "The livestream is back online! Come and say hi <a href=\"UCSJ4gkVC6NrvII8umztf0Ow/XCWUYd65DOm6x_AP0_iNgAk\"></a><br>→ <a href=\"https://www.youtube.com/watch?v=jfKfPfyJRdk\">https://www.youtube.com/watch?v=jfKfPfyJRdk&amp;ab_channel=LofiGirl</a>"
## 
## $snippet$topLevelComment$snippet$textOriginal
## [1] "The livestream is back online! Come and say hi \n→ https://www.youtube.com/watch?v=jfKfPfyJRdk&ab_channel=LofiGirl"
## 
## $snippet$topLevelComment$snippet$authorDisplayName
## [1] "@LofiGirl"
## 
## $snippet$topLevelComment$snippet$authorProfileImageUrl
## [1] "https://yt3.ggpht.com/M0eY1tfgiwuyqrSlWIkzf5-6RZSARiuChjpXyZe-hfl9C2fn-I4leLtxKAqYqGZv_FgE4u5TKQ=s48-c-k-c0x00ffffff-no-rj"
## 
## $snippet$topLevelComment$snippet$authorChannelUrl
## [1] "http://www.youtube.com/@LofiGirl"
## 
## $snippet$topLevelComment$snippet$authorChannelId
## $snippet$topLevelComment$snippet$authorChannelId$value
## [1] "UCSJ4gkVC6NrvII8umztf0Ow"
## 
## 
## $snippet$topLevelComment$snippet$canRate
## [1] TRUE
## 
## $snippet$topLevelComment$snippet$viewerRating
## [1] "none"
## 
## $snippet$topLevelComment$snippet$likeCount
## [1] 29982
## 
## $snippet$topLevelComment$snippet$publishedAt
## [1] "2022-07-10T16:44:02Z"
## 
## $snippet$topLevelComment$snippet$updatedAt
## [1] "2022-07-12T15:55:49Z"
## 
## 
## 
## $snippet$canReply
## [1] TRUE
## 
## $snippet$totalReplyCount
## [1] 490
## 
## $snippet$isPublic
## [1] TRUE
# Extract relevant information from the snippet section
comment_text <- video_comments$snippet$topLevelComment$snippet$textDisplay
author_name <- video_comments$snippet$topLevelComment$snippet$authorDisplayName
like_count <- video_comments$snippet$topLevelComment$snippet$likeCount
published_at <- video_comments$snippet$topLevelComment$snippet$publishedAt

# Create a dataframe to store the extracted information
comment_data <- data.frame(
  Comment = comment_text,
  Author = author_name,
  Likes = like_count,
  PublishedAt = as.POSIXct(published_at)
)

# Print the extracted data
print(comment_data)
## [1] PublishedAt
## <0 rows> (or 0-length row.names)
# Assuming video_metadata is the object containing the video metadata
# Accessing the 'items' data frame within the video_metadata object
video_items <- video_metadata$items

# Assuming there is only one row in the 'items' data frame
# Accessing the 'snippet' list within the first row of the video_items data frame
snippet <- video_items$snippet

# Accessing the 'topLevelComment' list within the 'snippet' list
top_level_comment <- snippet$topLevelComment

# Accessing the 'textDisplay' field within the 'snippet' list to get the comment text
comment_text <- top_level_comment$snippet$textDisplay

# Printing the comment text
print(comment_text)
## NULL

View data structure

str(engagement_data)
## tibble [1 Ɨ 3] (S3: tbl_df/tbl/data.frame)
##  $ Date    : POSIXct[1:1], format: "2022-07-10"
##  $ Likes   : num 7727952
##  $ Comments: num 17949