library(tidyverse)
library(lubridate)
library(viridis)
library(scales)
library(ggrepel)
vids <- read.csv("USvideos.csv")
vids.ori <- vids
str(vids)
## 'data.frame':    13400 obs. of  12 variables:
##  $ trending_date         : Factor w/ 67 levels "17.01.12","17.02.12",..: 14 14 14 14 14 14 14 14 14 14 ...
##  $ title                 : Factor w/ 2986 levels "'I have dad moves': Barack Obama discusses dancing on David Letterman's new Netflix show",..: 2802 2574 2081 1903 1231 89 2164 143 2482 2920 ...
##  $ channel_title         : Factor w/ 1408 levels "_¢_Á_\235","“÷\201\220µ_‘⬓_\220 Korean Englishman",..: 195 686 1046 472 902 559 1063 283 6 1358 ...
##  $ category_id           : int  22 24 23 24 24 28 24 28 1 25 ...
##  $ publish_time          : Factor w/ 2903 levels "2008-04-05T18:22:40.000Z",..: 302 271 255 275 253 307 240 258 281 279 ...
##  $ views                 : int  748374 2418783 3191434 343168 2095731 119180 2103417 817732 826059 256426 ...
##  $ likes                 : int  57527 97185 146033 10172 132235 9763 15993 23663 3543 12654 ...
##  $ dislikes              : int  2966 6146 5339 666 1989 511 2445 778 119 1363 ...
##  $ comment_count         : int  15954 12703 8181 2146 17518 1434 1970 3432 340 2368 ...
##  $ comments_disabled     : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ ratings_disabled      : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ video_error_or_removed: logi  FALSE FALSE FALSE FALSE FALSE FALSE ...

Preprocessing

vids$trending_date <- ydm(vids$trending_date) # melakukan transformasi date

vids$title <- as.character(vids$title)

vids$category_id <- sapply(as.character(vids$category_id), switch, 
                           "1" = "Film and Animation",
                           "2" = "Autos and Vehicles", 
                           "10" = "Music", 
                           "15" = "Pets and Animals", 
                           "17" = "Sports",
                           "19" = "Travel and Events", 
                           "20" = "Gaming", 
                           "22" = "People and Blogs", 
                           "23" = "Comedy",
                           "24" = "Entertainment", 
                           "25" = "News and Politics",
                           "26" = "Howto and Style", 
                           "27" = "Education",
                           "28" = "Science and Technology", 
                           "29" = "Nonprofit and Activism",
                           "43" = "Shows")

vids$category_id <- as.factor(vids$category_id)
vids$publish_time <- ymd_hms(vids$publish_time,tz="America/New_York")
## Date in ISO8601 format; converting timezone from UTC to "America/New_York".
vids$publish_hour <- hour(vids$publish_time)

pw <- function(x){
    if(x < 8){
      x <- "12am to 8am"
    }else if(x >= 8 & x < 16){
      x <- "8am to 3pm"
    }else{
      x <- "3pm to 12am"
    }  
}

vids$publish_when <- as.factor(sapply(vids$publish_hour, pw))
vids$publish_wday <- as.factor(weekdays(vids$publish_time))


vids$publish_wday <- ordered(vids$publish_wday, 
                             levels=c("Monday", 
                                      "Tuesday", 
                                      "Wednesday", 
                                      "Thursday", 
                                      "Friday", 
                                      "Saturday", 
                                      "Sunday"))
vids[,c("views", 
        "likes", 
        "dislikes", 
        "comment_count")] <- lapply(vids[,c("views", 
                                            "likes", 
                                            "dislikes", 
                                            "comment_count")], 
                                    as.numeric)

vids.u <- vids[match(unique(vids$title), vids$title),]

vids.u$timetotrend <- vids.u$trending_date - as.Date(vids.u$publish_time)
vids.u$timetotrend <- as.factor(ifelse(vids.u$timetotrend <= 7, vids.u$timetotrend, "8+"))

rownames(vids.u) <- 1:nrow(vids.u)

Plotting

temaku <- theme(panel.background = element_rect(fill = "#2b2d31"),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_line(color = "grey2", linetype = 2),
        panel.grid.major.y = element_line(color = "grey2", linetype = 2),
        panel.grid = element_line(colour = "grey2", linetype = 2),
        legend.key = element_rect(fill = "#2b2d31"),
        legend.background = element_rect(fill = "#2b2d31"),
        legend.title = element_text(color = "white"),
        legend.text = element_text(color = "white"),
        legend.position = "top",
        plot.subtitle = element_text(color = "white"),
        plot.title = element_text(colour = "white"),
        plot.caption = element_text(colour = "white"),
        plot.background = element_rect(fill = "#2b2d31"),
        axis.title = element_text(color = "white"),
        axis.ticks.x = element_line(color = "grey"),
        axis.ticks.y = element_line(color = "grey"),
        axis.text.x = element_text(color = "white"),
        axis.text.y = element_text(color = "white"))

Interactive Plot

library(plotly)
ggplotly(plot1)

Bonus examples

from ggplotly’s packages examples:

highlight_key(iris) %>%
  GGally::ggpairs(aes(colour = Species), columns = 1:4) %>%
  ggplotly(tooltip = c("x", "y", "colour")) %>%
  highlight("plotly_selected")