library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(glue)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggpubr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##Load Data
vids <- read.csv("data_input/youtubetrends_2023.csv", stringsAsFactors = TRUE )
head(vids)
## trending_date
## 1 2023-01-01
## 2 2023-01-01
## 3 2023-01-01
## 4 2023-01-01
## 5 2023-01-01
## 6 2023-01-01
## title
## 1 Peach Bowl: Ohio State Buckeyes vs. Georgia Bulldogs | Full Game Highlights
## 2 If You Build It, I'll Pay For It!
## 3 Fiesta Bowl: TCU Horned Frogs vs. Michigan Wolverines | College Football Playoff
## 4 Dude Perfect vs. Luka Doncic (1-on-1)
## 5 Times Square 2023 Ball Drop in New York City: full video
## 6 New Year's 2023: Dubai puts on thrilling fireworks show at Burj Khalifa
## channel_title category_id publish_time views likes
## 1 ESPN College Football Sports 2023-01-01 05:12:55 618494 7884
## 2 MrBeast Gaming Gaming 2022-12-31 20:00:04 3621512 238494
## 3 ESPN College Football Sports 2023-01-01 01:11:58 500700 4915
## 4 Dude Perfect Sports 2022-12-31 14:59:56 2811640 148481
## 5 News 19 WLTX News and Politics 2023-01-01 05:21:44 358177 2963
## 6 Global News News and Politics 2022-12-31 20:51:09 2088464 18234
## dislikes comment_count comments_disabled ratings_disabled
## 1 0 2413 FALSE FALSE
## 2 0 12935 FALSE FALSE
## 3 0 2355 FALSE FALSE
## 4 0 4543 FALSE FALSE
## 5 0 654 FALSE FALSE
## 6 0 1508 FALSE FALSE
## video_error_or_removed publish_hour publish_when publish_wday timetotrend
## 1 FALSE 5 12am to 7am Sunday 1
## 2 FALSE 20 4pm to 12am Saturday 2
## 3 FALSE 1 12am to 7am Sunday 1
## 4 FALSE 14 8am to 3pm Saturday 2
## 5 FALSE 5 12am to 7am Sunday 1
## 6 FALSE 20 4pm to 12am Saturday 2
Deskripsi kolom: - trending_date : tanggal trending
(format: YY.MM.DD) - title : judul video -
channel_title : nama channel Youtube -
category_id : kategori video - publish_time :
tanggal upload video (format: YYYY-MM-DD-HH-MM-SS) - views
: jumlah views dalam video tersebut - likes : jumlah likes
dalam video tersebut - dislikes : jumlah dislikes dalam
video tersebut - comment_count : jumlah komentar -
comment_disabled: apakah kolom komentar tidak diaktifkan -
rating_disabled : apakah rating video tidak diaktifkan -
video_error_or_removed: apakah video dihapus -
publish_hour : jam video tersebut dipublish -
publish_when : range video tersebut dipublish -
publish_wday : hari video tersebut dipublish -
timetotrend : rentang waktu video tersebut dari dipublish
sampai menjadi trending (harian)
str(vids)
## 'data.frame': 72389 obs. of 16 variables:
## $ trending_date : Factor w/ 362 levels "2023-01-01","2023-01-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ title : Factor w/ 12595 levels "'1위/4K' 지민 (Jimin) - Like Crazy #엠카운트다운 EP.790 | Mnet 230330 방송",..: 8255 5659 3251 2843 11085 7742 4050 2291 9601 6102 ...
## $ channel_title : Factor w/ 3548 levels " 3 Southern Cats and Momma™ ",..: 957 2116 957 871 2215 1199 255 1082 3291 3459 ...
## $ category_id : Factor w/ 15 levels "Autos and Vehicles",..: 14 6 14 14 9 9 4 9 11 14 ...
## $ publish_time : Factor w/ 12059 levels "2022-12-18 19:20:24",..: 218 204 213 197 219 206 212 194 183 191 ...
## $ views : int 618494 3621512 500700 2811640 358177 2088464 1915097 663532 1454615 2706246 ...
## $ likes : int 7884 238494 4915 148481 2963 18234 24181 5130 32079 69895 ...
## $ dislikes : int 0 0 0 0 0 0 0 0 0 0 ...
## $ comment_count : int 2413 12935 2355 4543 654 1508 2711 776 3410 4231 ...
## $ comments_disabled : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ ratings_disabled : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ video_error_or_removed: logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ publish_hour : int 5 20 1 14 5 20 0 13 20 4 ...
## $ publish_when : Factor w/ 3 levels "12am to 7am",..: 1 2 1 3 1 2 1 3 2 1 ...
## $ publish_wday : Factor w/ 7 levels "Friday","Monday",..: 4 3 4 3 4 3 4 3 1 3 ...
## $ timetotrend : int 1 2 1 2 1 2 1 2 3 2 ...
unique(vids$category_id)
## [1] Sports Gaming News and Politics
## [4] Entertainment People and Blogs Comedy
## [7] Music Education Film and Animation
## [10] Science and Technology Howto and Style Travel and Events
## [13] Autos and Vehicles Pets and Animals Nonprofit and Activism
## 15 Levels: Autos and Vehicles Comedy Education ... Travel and Events
##Membuat analisa terkait karakteristik video dari kategori ‘Travel and Events’ yang memiliki video trending tidak lebih dari 1 minggu setelah rilis (df vids_Travel)
vids_Travel <-
vids %>%
filter(category_id == "Travel and Events", timetotrend <= 7)
head(vids_Travel)
## trending_date title
## 1 2023-01-01 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 2 2023-01-02 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 3 2023-01-03 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 4 2023-01-08 Day in the Life of a Japanese Game Designer
## 5 2023-01-09 They Said It Would Be Worth the Risk
## 6 2023-01-09 Day in the Life of a Japanese Game Designer
## channel_title category_id publish_time views likes dislikes
## 1 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 561164 20361 0
## 2 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 577256 20816 0
## 3 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 590802 21162 0
## 4 Paolo fromTOKYO Travel and Events 2023-01-06 23:59:08 389259 23481 0
## 5 Eva zu Beck Travel and Events 2023-01-07 19:54:37 246397 14160 0
## 6 Paolo fromTOKYO Travel and Events 2023-01-06 23:59:08 506513 28309 0
## comment_count comments_disabled ratings_disabled video_error_or_removed
## 1 1570 FALSE FALSE FALSE
## 2 1603 FALSE FALSE FALSE
## 3 1621 FALSE FALSE FALSE
## 4 1402 FALSE FALSE FALSE
## 5 993 FALSE FALSE FALSE
## 6 1590 FALSE FALSE FALSE
## publish_hour publish_when publish_wday timetotrend
## 1 15 8am to 3pm Wednesday 5
## 2 15 8am to 3pm Wednesday 6
## 3 15 8am to 3pm Wednesday 7
## 4 23 4pm to 12am Friday 3
## 5 19 4pm to 12am Saturday 3
## 6 23 4pm to 12am Friday 4
###Mencari Hubungan Comment Ratio dan Like Ratio dari df vids_Travel Hasilnya hubungan Comment Ratio dan Like Ratio dari df vids_Travel berbanding lurus
vids_Travel_clean <- vids_Travel %>%
# deselect kolom yang tidak dibutuhkan
select(-comments_disabled, -ratings_disabled, -video_error_or_removed) %>%
# buat kolom baru
mutate(likes_ratio = likes / views,
comment_ratio = comment_count / views)
head(vids_Travel_clean)
## trending_date title
## 1 2023-01-01 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 2 2023-01-02 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 3 2023-01-03 We Raced To Visit The Most US States In 100 Hrs - Day 3
## 4 2023-01-08 Day in the Life of a Japanese Game Designer
## 5 2023-01-09 They Said It Would Be Worth the Risk
## 6 2023-01-09 Day in the Life of a Japanese Game Designer
## channel_title category_id publish_time views likes dislikes
## 1 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 561164 20361 0
## 2 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 577256 20816 0
## 3 Jet Lag: The Game Travel and Events 2022-12-28 15:31:33 590802 21162 0
## 4 Paolo fromTOKYO Travel and Events 2023-01-06 23:59:08 389259 23481 0
## 5 Eva zu Beck Travel and Events 2023-01-07 19:54:37 246397 14160 0
## 6 Paolo fromTOKYO Travel and Events 2023-01-06 23:59:08 506513 28309 0
## comment_count publish_hour publish_when publish_wday timetotrend likes_ratio
## 1 1570 15 8am to 3pm Wednesday 5 0.03628351
## 2 1603 15 8am to 3pm Wednesday 6 0.03606026
## 3 1621 15 8am to 3pm Wednesday 7 0.03581911
## 4 1402 23 4pm to 12am Friday 3 0.06032230
## 5 993 19 4pm to 12am Saturday 3 0.05746823
## 6 1590 23 4pm to 12am Friday 4 0.05588998
## comment_ratio
## 1 0.002797756
## 2 0.002776931
## 3 0.002743728
## 4 0.003601715
## 5 0.004030082
## 6 0.003139110
ggplot(data = vids_Travel_clean, aes(x = likes_ratio, y = comment_ratio, color = factor(category_id))) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship Between Like Ratio & Comment Ratio by Category",
x = "Like Ratio",
y = "Comment Ratio",
color = "Category ID") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
###Mencari top 10 dari Video Travel dan Event (df vids_Travel) yang memiliki rata-rata likes terbanyak
vids_Travel_Like <-
vids_Travel_clean %>%
group_by(title) %>%
summarise(mean_likes = mean(likes, na.rm = TRUE)) %>%
arrange(desc(mean_likes))
head(vids_Travel_Like)
## # A tibble: 6 × 2
## title mean_likes
## <fct> <dbl>
## 1 I Was Trafficked Through The World's Deadliest Jungle 103214.
## 2 I Crossed The World's Deadliest Jungle: Darien Gap 85467
## 3 We're Having a Baby! 68800
## 4 Our Unexpected Move to Toronto 45774
## 5 $10,000 Basement Renovation (+ Bec's health update) 42671.
## 6 Not The Video We Wanted to Make 41895.
#Mencari top 10 video Travel & Event (berdasarkan rata-rata like)
Vids_Travel_Like_top10 <-
vids_Travel_Like %>%
arrange(desc(mean_likes)) %>%
head(10)
Vids_Travel_Like_top10
## # A tibble: 10 × 2
## title mean_likes
## <fct> <dbl>
## 1 I Was Trafficked Through The World's Deadliest Jungle 103214.
## 2 I Crossed The World's Deadliest Jungle: Darien Gap 85467
## 3 We're Having a Baby! 68800
## 4 Our Unexpected Move to Toronto 45774
## 5 $10,000 Basement Renovation (+ Bec's health update) 42671.
## 6 Not The Video We Wanted to Make 41895.
## 7 Raw, unfiltered BOAT LIFE in Thailand! (Extended Cut) 39122.
## 8 Day in the Life of a Japanese Bread Baker 38268.
## 9 Why Sudan is on the Verge of Civil War 37192.
## 10 Gender Reveal Fail! 36946.
library(ggplot2)
library(dplyr)
ggplot(Vids_Travel_Like_top10, aes(x = reorder(title, mean_likes), y = mean_likes)) +
geom_bar(stat = "identity", fill = "pink", color = "black") +
labs(title = "Average Likes per Video",
x = "Video Title",
y = "Average Likes") +
coord_flip()
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
###Mencari Top 10 Channel yang memiliki jumlah video dengan trending
vids_Travel_count <- vids_Travel %>%
group_by(channel_title) %>%
summarise(jumlah_video = n() ) %>%
arrange(-jumlah_video) %>%
slice_head(n = 10) %>%
ungroup()
vids_Travel_count
## # A tibble: 10 × 2
## channel_title jumlah_video
## <fct> <int>
## 1 Eamon & Bec 138
## 2 Sailing La Vagabonde 76
## 3 Trek Trendy 55
## 4 Kara and Nate 51
## 5 Jet Lag: The Game 45
## 6 The Ninja Fam! 18
## 7 Gone with the Wynns 17
## 8 Paolo fromTOKYO 15
## 9 Dunkin' 14
## 10 Eva zu Beck 11
ggplot(vids_Travel_count, aes(x = jumlah_video ,
y = reorder(channel_title, jumlah_video) )) +
# bar plot
geom_col(aes(fill = jumlah_video)) +
# additional
scale_fill_gradient2(low="gray", mid= "pink", high="magenta", midpoint = median(vids_Travel_count$jumlah_video)) +
labs(title = "Top 10 Trending Travel and Event Channel of YouTube US 2023",
x = "Video Count",
y = NULL) +
scale_x_continuous(labels = comma) +
theme_minimal() +
theme(legend.position = "none")
###Mencari hari published video dengan rata-rata viewers terbanyak dari Top Trending Channel Travel & Event
# data
vids_Travel %>%
# Mengambil channel_title dengan Trending tertinggi (Eamon&Bec)
filter(channel_title == "Eamon & Bec") %>%
# Menghitung rata-rata views tiap jam publish
group_by(publish_wday) %>%
summarise(mean_viewers = mean(views)) %>%
ungroup() %>%
# Membuat tooltip
mutate(label = glue("Publish Day: {publish_wday}
Average Views: {comma(mean_viewers)}"))
## # A tibble: 2 × 3
## publish_wday mean_viewers label
## <fct> <dbl> <glue>
## 1 Saturday 396141. Publish Day: Saturday
## Average Views: 396,141
## 2 Sunday 537834. Publish Day: Sunday
## Average Views: 537,834
# data
vids_Travel %>%
# Mengambil channel_title dengan Trending tertinggi (Eamon&Bec)
filter(channel_title == "Eamon & Bec") %>%
# Menghitung rata-rata views tiap jam publish
group_by(publish_wday) %>%
slice_max(order_by = views, n = 1) %>%
ungroup() %>%
# Membuat tooltip
mutate(label = glue("Publish Day: {publish_wday}
Video: {title}
Views: {comma(views)}"))
## # A tibble: 2 × 17
## trending_date title channel_title category_id publish_time views likes
## <fct> <fct> <fct> <fct> <fct> <int> <int>
## 1 2023-02-10 Tiny House … Eamon & Bec Travel and… 2023-02-04 … 4.75e5 18495
## 2 2023-05-06 Not The Vid… Eamon & Bec Travel and… 2023-04-30 … 1.11e6 47541
## # ℹ 10 more variables: dislikes <int>, comment_count <int>,
## # comments_disabled <lgl>, ratings_disabled <lgl>,
## # video_error_or_removed <lgl>, publish_hour <int>, publish_when <fct>,
## # publish_wday <fct>, timetotrend <int>, label <glue>
###Mencari Title dengan viewers tertinggi yang dipublished Eamon & Bec di hari Minggu
channel_name_Travel <- "Eamon & Bec"
vids_top_per_wday <- vids_Travel %>%
filter(channel_title == channel_name_Travel, publish_wday == "Sunday") %>%
group_by(title) %>%
slice_max(order_by = views, n = 1) %>%
ungroup() %>%
arrange(desc(views)) %>%
slice_head(n = 5) %>%
mutate(label = glue("{title}\n{comma(views)} views"))
ggplot(vids_top_per_wday, aes(x = views, y = reorder(title, views))) +
geom_segment(aes(x = 0, xend = views, y = title, yend = title), color = "gray", size = 1) +
geom_point(color = "pink", size = 6) +
geom_text(aes(label = comma(views)), hjust = -0.5, size = 4) +
scale_x_continuous(limits = c(0,1500000), labels = comma) +
labs(title = glue("Top 5 Most Viewed Videos Published on Sunday ({channel_name_Travel})"),
x = "Views",
y = "Video Title") +
theme_minimal() +
theme(plot.title = element_text(size = 10.5, face = "bold"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.