library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggrepel)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
theme_algoritma <- readRDS("theme_algoritma.rds")
youtube <- read.csv("data/youtubetrends.csv")

youtube <- youtube %>% 
mutate_if(is.integer, as.numeric) %>% 
  mutate(
    trending_date = ymd(trending_date),
    publish_hour = hour(publish_time),
    category_id = as.factor(category_id),
    likesratio = likes / views
  )
head(youtube)
##   trending_date                                                          title
## 1    2017-11-14                             WE WANT TO TALK ABOUT OUR MARRIAGE
## 2    2017-11-14 The Trump Presidency: Last Week Tonight with John Oliver (HBO)
## 3    2017-11-14          Racist Superman | Rudy Mancuso, King Bach & Lele Pons
## 4    2017-11-14                               Nickelback Lyrics: Real or Fake?
## 5    2017-11-14                                       I Dare You: GOING BALD!?
## 6    2017-11-14                                          2 Weeks with iPhone X
##           channel_title            category_id        publish_time   views
## 1          CaseyNeistat       People and Blogs 2017-11-13 12:13:01  748374
## 2       LastWeekTonight          Entertainment 2017-11-13 02:30:00 2418783
## 3          Rudy Mancuso                 Comedy 2017-11-12 14:05:24 3191434
## 4 Good Mythical Morning          Entertainment 2017-11-13 06:00:04  343168
## 5              nigahiga          Entertainment 2017-11-12 13:01:41 2095731
## 6              iJustine Science and Technology 2017-11-13 14:07:23  119180
##    likes dislikes comment_count comments_disabled ratings_disabled
## 1  57527     2966         15954             FALSE            FALSE
## 2  97185     6146         12703             FALSE            FALSE
## 3 146033     5339          8181             FALSE            FALSE
## 4  10172      666          2146             FALSE            FALSE
## 5 132235     1989         17518             FALSE            FALSE
## 6   9763      511          1434             FALSE            FALSE
##   video_error_or_removed publish_hour publish_when publish_wday timetotrend
## 1                  FALSE           12   8am to 3pm       Monday           1
## 2                  FALSE            2  12am to 8am       Monday           1
## 3                  FALSE           14   8am to 3pm       Sunday           2
## 4                  FALSE            6  12am to 8am       Monday           1
## 5                  FALSE           13   8am to 3pm       Sunday           2
## 6                  FALSE           14   8am to 3pm       Monday           1
##   likesratio
## 1 0.07686932
## 2 0.04017930
## 3 0.04575780
## 4 0.02964146
## 5 0.06309732
## 6 0.08191811
youtube_likesratio <- youtube %>%
  filter(likes > 1000) %>% 
  group_by(category_id) %>% 
  summarise(
    likesratio = mean(likesratio)
    ) %>% 
  ungroup() %>% 
  mutate(
    category_id = reorder(category_id, likesratio)
  ) %>%
  pivot_longer(cols = -category_id) %>% 
  mutate(
    text = paste0(name,": ",round(value,2))
  )

youtube_likesratio
## # A tibble: 16 x 4
##    category_id            name        value text            
##    <fct>                  <chr>       <dbl> <chr>           
##  1 Autos and Vehicles     likesratio 0.0264 likesratio: 0.03
##  2 Comedy                 likesratio 0.0573 likesratio: 0.06
##  3 Education              likesratio 0.0488 likesratio: 0.05
##  4 Entertainment          likesratio 0.0397 likesratio: 0.04
##  5 Film and Animation     likesratio 0.0449 likesratio: 0.04
##  6 Gaming                 likesratio 0.0515 likesratio: 0.05
##  7 Howto and Style        likesratio 0.0564 likesratio: 0.06
##  8 Music                  likesratio 0.0792 likesratio: 0.08
##  9 News and Politics      likesratio 0.0235 likesratio: 0.02
## 10 Nonprofit and Activism likesratio 0.0379 likesratio: 0.04
## 11 People and Blogs       likesratio 0.0642 likesratio: 0.06
## 12 Pets and Animals       likesratio 0.0501 likesratio: 0.05
## 13 Science and Technology likesratio 0.0458 likesratio: 0.05
## 14 Shows                  likesratio 0.0322 likesratio: 0.03
## 15 Sports                 likesratio 0.0217 likesratio: 0.02
## 16 Travel and Events      likesratio 0.0338 likesratio: 0.03
plot_likesratio <- ggplot(youtube_likesratio, aes(x = value, y = category_id, text = text)) +
geom_col(aes(fill = name)) +
  labs(y = "", x = "", title = "Youtube Likes Ratio",
      subtitle = "Likes Ratio per Category ID") +
  theme(legend.position = "none") +
  scale_x_continuous(labels = unit_format(unit = "")) +
  theme_algoritma


ggplotly(plot_likesratio, tooltip = "text")

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.