# Libraries
library(vosonSML)
library(magrittr)
guide to get ApI key…. For Google developer APi key, one need just a google account. For Google developer ApI key youtube click the link https://developers.google.com/youtube/v3/getting-started. Go to
# Youtube data
Video_Ids <- GetYoutubeVideoIDs(c("https://www.youtube.com/watch?v=4jRBRDbJemM" , "https://www.youtube.com/watch?v=HMOI_lkzW08" ))
## Extracted 2 video ids.
youtubeData <- Authenticate("youtube", apiKey = api_key) %>%
Collect(videoIDs = Video_Ids, writeToFile = FALSE , maxComments = 600)
## Collecting comment threads for youtube videos...
## Video 1 of 2
## ---------------------------------------------------------------
## ** video Id: 4jRBRDbJemM
## ** Collected threads: 137
## ** Collecting replies for 117 threads with replies. Please be patient.
## .....................................................................................................................
## ** Collected replies: 135
## ** Total video comments: 272
## ---------------------------------------------------------------
## Video 2 of 2
## ---------------------------------------------------------------
## ** video Id: HMOI_lkzW08
## ** Collected threads: 110
## ** Collecting replies for 56 threads with replies. Please be patient.
## ........................................................
## ** Collected replies: 66
## ** Total video comments: 176
## ---------------------------------------------------------------
## ** Total comments collected for all videos 448.
## (Estimated API unit cost: 358)
## Done.
## Elapsed time: 0 hrs 1 mins 0 secs (59.9)
activityNetwork <- youtubeData %>% Create("activity") %>% AddText(youtubeData)
## Generating youtube activity network...
## -------------------------
## collected youtube comments | 448
## top-level comments | 247
## reply comments | 201
## videos | 2
## network nodes | 450
## network edges | 448
## -------------------------
## Done.
## Adding text to network...Done.
activityGraph <- activityNetwork %>% Graph()
## Creating igraph network graph...Done.
str(youtubeData)
## Classes 'dataource', 'youtube' and 'data.frame': 448 obs. of 12 variables:
## $ Comment : chr "NOTE: This revised video corrects three minor errors (that, fortunately, did not effect the main idea) found by"| __truncated__ "great!" "This is the clearest explanation of the ROC curve I've yet seen. Kudos to you for developing this!" "This is absolutely amazing... Thank you!" ...
## $ AuthorDisplayName : chr "StatQuest with Josh Starmer" "Jorge Arroyo" "Paul Simpson" "SF001" ...
## $ AuthorProfileImageUrl: chr "https://yt3.ggpht.com/a/AGF-l78PbYk1S1sUE2zdOxRJYCG80XQmHOVEqc2aIA=s48-c-k-c0xffffffff-no-rj-mo" "https://yt3.ggpht.com/a/AGF-l79zkV58_oalKBKzlnPFYYpXDapAShM7ShkHTg=s48-c-k-c0xffffffff-no-rj-mo" "https://yt3.ggpht.com/a/AGF-l7_nggqEbD2C71evOuxu44heYIZvcJILm53hLA=s48-c-k-c0xffffffff-no-rj-mo" "https://yt3.ggpht.com/a/AGF-l7_2y7fGGqjwDOwg15VBSQfhf3iPc6pyTlNOoQ=s48-c-k-c0xffffffff-no-rj-mo" ...
## $ AuthorChannelUrl : chr "http://www.youtube.com/channel/UCtYLUTtgS3k1Fg4y5tAhLbw" "http://www.youtube.com/channel/UCZUMBu9EcM1CbNl8s8YIiKA" "http://www.youtube.com/channel/UCeXiR16iXfQmt1N10kBQPZw" "http://www.youtube.com/channel/UCJ1cbqAvOTMHhTBzPp1AUrQ" ...
## $ AuthorChannelID : chr "UCtYLUTtgS3k1Fg4y5tAhLbw" "UCZUMBu9EcM1CbNl8s8YIiKA" "UCeXiR16iXfQmt1N10kBQPZw" "UCJ1cbqAvOTMHhTBzPp1AUrQ" ...
## $ ReplyCount : chr "4" "1" "1" "1" ...
## $ LikeCount : chr "33" "2" "1" "2" ...
## $ PublishedAt : chr "2019-07-11T21:17:16.000Z" "2020-01-21T16:23:57.000Z" "2020-01-21T16:16:03.000Z" "2020-01-21T12:40:37.000Z" ...
## $ UpdatedAt : chr "2019-07-11T21:18:23.000Z" "2020-01-21T16:23:57.000Z" "2020-01-21T16:16:03.000Z" "2020-01-21T12:40:37.000Z" ...
## $ CommentID : chr "Ugx1fVKgJSqVWQB63O14AaABAg" "UgypfctlDR8XTEfzzVd4AaABAg" "UgyVf9y3QkcF8dlNait4AaABAg" "Ugyh-fR8GMQsY9otc7h4AaABAg" ...
## $ ParentID : chr NA NA NA NA ...
## $ VideoID : chr "4jRBRDbJemM" "4jRBRDbJemM" "4jRBRDbJemM" "4jRBRDbJemM" ...
write.csv(youtubeData ,file = 'C:/Users/WASIM/Documents/youtubeData.csv' , row.names = F )
library(syuzhet)
y_data <- read.csv(file.choose(), header = T)
str(y_data)
## 'data.frame': 448 obs. of 12 variables:
## $ Comment : Factor w/ 359 levels ":)","@3:33 The way the matrix is arranged is transposing necessary?",..: 210 99 316 313 24 168 124 277 214 139 ...
## $ AuthorDisplayName : Factor w/ 243 levels "<U+041C><U+0430><U+043A><U+0441><U+0438><U+043C> <U+0410><U+043D><U+0434><U+0440><U+0435><U+0435><U+0432><U+0438><U+0447>",..: 209 101 165 197 93 195 26 95 1 239 ...
## $ AuthorProfileImageUrl: Factor w/ 243 levels "https://yt3.ggpht.com/a/AGF-l7--4K4Y9qWotIVsdurIKCjlZAcR2OMDPgDb8Q=s48-c-k-c0xffffffff-no-rj-mo",..: 169 242 101 70 105 26 165 115 98 83 ...
## $ AuthorChannelUrl : Factor w/ 243 levels "http://www.youtube.com/channel/UC-HDZnn5EpM9hj32PwIrhKQ",..: 187 242 85 121 204 175 217 155 66 232 ...
## $ AuthorChannelID : Factor w/ 243 levels "UC-HDZnn5EpM9hj32PwIrhKQ",..: 187 242 85 121 204 175 217 155 66 232 ...
## $ ReplyCount : int 4 1 1 1 1 1 1 1 1 1 ...
## $ LikeCount : int 33 2 1 2 4 1 1 1 1 1 ...
## $ PublishedAt : Factor w/ 448 levels "2017-12-04T21:09:22.000Z",..: 116 444 443 441 439 437 435 433 430 429 ...
## $ UpdatedAt : Factor w/ 448 levels "2017-12-04T21:09:22.000Z",..: 116 444 443 441 439 437 435 433 430 429 ...
## $ CommentID : Factor w/ 448 levels "Ugw-A0FfZ46CUwKa2qd4AaABAg",..: 124 311 335 281 412 273 67 44 426 61 ...
## $ ParentID : Factor w/ 173 levels "Ugw-jysleACjECiYHSd4AaABAg",..: NA NA NA NA NA NA NA NA NA NA ...
## $ VideoID : Factor w/ 2 levels "4jRBRDbJemM",..: 1 1 1 1 1 1 1 1 1 1 ...
#Sentiment
comments <- iconv(y_data$Comment, to = "utf-8")
sentiment_score <- get_nrc_sentiment(comments)
head(sentiment_score)
## anger anticipation disgust fear joy sadness surprise trust negative positive
## 1 0 0 0 1 1 0 0 2 0 3
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 1 0 0 0 0 1
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 1 0
## 6 0 0 0 0 0 0 0 0 0 0
#Neutral
sentiment_score$neutral <-ifelse(sentiment_score$positive+sentiment_score$negative ==0, 1, 0)
head(sentiment_score)
## anger anticipation disgust fear joy sadness surprise trust negative positive
## 1 0 0 0 1 1 0 0 2 0 3
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 1 0 0 0 0 1
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 1 0
## 6 0 0 0 0 0 0 0 0 0 0
## neutral
## 1 0
## 2 1
## 3 0
## 4 1
## 5 0
## 6 1
# Barplot
barplot(100* colSums(sentiment_score)/sum(sentiment_score),
las = 2,
col= rainbow(10),
ylab = 'Percentage',
xlab ='',
main = 'Sentiment Score for comments-Double BAM'
)