library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggrepel)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
theme_algoritma <- readRDS("theme_algoritma.rds")
youtube <- read.csv("data/youtubetrends.csv")
youtube <- youtube %>%
mutate_if(is.integer, as.numeric) %>%
mutate(
trending_date = ymd(trending_date),
publish_hour = hour(publish_time),
category_id = as.factor(category_id),
likesratio = likes / views
)
head(youtube)
## trending_date title
## 1 2017-11-14 WE WANT TO TALK ABOUT OUR MARRIAGE
## 2 2017-11-14 The Trump Presidency: Last Week Tonight with John Oliver (HBO)
## 3 2017-11-14 Racist Superman | Rudy Mancuso, King Bach & Lele Pons
## 4 2017-11-14 Nickelback Lyrics: Real or Fake?
## 5 2017-11-14 I Dare You: GOING BALD!?
## 6 2017-11-14 2 Weeks with iPhone X
## channel_title category_id publish_time views
## 1 CaseyNeistat People and Blogs 2017-11-13 12:13:01 748374
## 2 LastWeekTonight Entertainment 2017-11-13 02:30:00 2418783
## 3 Rudy Mancuso Comedy 2017-11-12 14:05:24 3191434
## 4 Good Mythical Morning Entertainment 2017-11-13 06:00:04 343168
## 5 nigahiga Entertainment 2017-11-12 13:01:41 2095731
## 6 iJustine Science and Technology 2017-11-13 14:07:23 119180
## likes dislikes comment_count comments_disabled ratings_disabled
## 1 57527 2966 15954 FALSE FALSE
## 2 97185 6146 12703 FALSE FALSE
## 3 146033 5339 8181 FALSE FALSE
## 4 10172 666 2146 FALSE FALSE
## 5 132235 1989 17518 FALSE FALSE
## 6 9763 511 1434 FALSE FALSE
## video_error_or_removed publish_hour publish_when publish_wday timetotrend
## 1 FALSE 12 8am to 3pm Monday 1
## 2 FALSE 2 12am to 8am Monday 1
## 3 FALSE 14 8am to 3pm Sunday 2
## 4 FALSE 6 12am to 8am Monday 1
## 5 FALSE 13 8am to 3pm Sunday 2
## 6 FALSE 14 8am to 3pm Monday 1
## likesratio
## 1 0.07686932
## 2 0.04017930
## 3 0.04575780
## 4 0.02964146
## 5 0.06309732
## 6 0.08191811
youtube_likesratio <- youtube %>%
filter(likes > 1000) %>%
group_by(category_id) %>%
summarise(
likesratio = mean(likesratio)
) %>%
ungroup() %>%
mutate(
category_id = reorder(category_id, likesratio)
) %>%
pivot_longer(cols = -category_id) %>%
mutate(
text = paste0(name,": ",round(value,2))
)
youtube_likesratio
## # A tibble: 16 x 4
## category_id name value text
## <fct> <chr> <dbl> <chr>
## 1 Autos and Vehicles likesratio 0.0264 likesratio: 0.03
## 2 Comedy likesratio 0.0573 likesratio: 0.06
## 3 Education likesratio 0.0488 likesratio: 0.05
## 4 Entertainment likesratio 0.0397 likesratio: 0.04
## 5 Film and Animation likesratio 0.0449 likesratio: 0.04
## 6 Gaming likesratio 0.0515 likesratio: 0.05
## 7 Howto and Style likesratio 0.0564 likesratio: 0.06
## 8 Music likesratio 0.0792 likesratio: 0.08
## 9 News and Politics likesratio 0.0235 likesratio: 0.02
## 10 Nonprofit and Activism likesratio 0.0379 likesratio: 0.04
## 11 People and Blogs likesratio 0.0642 likesratio: 0.06
## 12 Pets and Animals likesratio 0.0501 likesratio: 0.05
## 13 Science and Technology likesratio 0.0458 likesratio: 0.05
## 14 Shows likesratio 0.0322 likesratio: 0.03
## 15 Sports likesratio 0.0217 likesratio: 0.02
## 16 Travel and Events likesratio 0.0338 likesratio: 0.03
plot_likesratio <- ggplot(youtube_likesratio, aes(x = value, y = category_id, text = text)) +
geom_col(aes(fill = name)) +
labs(y = "", x = "", title = "Youtube Likes Ratio",
subtitle = "Likes Ratio per Category ID") +
theme(legend.position = "none") +
scale_x_continuous(labels = unit_format(unit = "")) +
theme_algoritma
ggplotly(plot_likesratio, tooltip = "text")
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.