getwd()
## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"

WHO IS FLENN:

Mahdi Kouloughli AKA FLENN AKA wlid bourouba is an Algerien rappeur

Why Him And Not Another One ?

First I chose Flenn to celebrate him being the most streamed Algerian artist on spotify, also to celebrate One year anniversary of his First album Flou

why youtube and not spotify ? :

The main raison to chose Youtube over spotify is that spotify don’t show the streams to the public,
mean while the data in youtube is open, and you can use an API to scrape the data

What Do We Expect to See ?

In this project I’ll do a full anlisis for the youtube channel also to the 3 projects that he has

Phase One

Get the Data froù youtube

channel id

flenn_id ="UCpofNN0ZqT57J1gqr83yeRw"

get the data from youtube :

X title publication_date viewCount commentCount likeCount year year_month
1 Flenn - Spam [ Clip Officiel ] 2022-06-20 4343834 10149 169423 2022 2022 - 06 (juin)
2 Flenn - One Shot #2 2015-10-31 807829 633 13179 2015 2015 - 10 (oct.)
3 Flenn - La Brune [ Clip Officiel ] 2021-11-11 4957727 9230 178846 2021 2021 - 11 (nov.)
4 Flenn - Chemsi Ghabet [ Clip Officiel ] 2016-04-21 12996968 16240 233950 2016 2016 - 04 (avr.)
5 Flenn - Tab Tab 2022-11-03 2906279 3733 66462 2022 2022 - 11 (nov.)
6 Flenn - Liyam [ Bande Originale ] 2021-04-05 78133094 29051 762337 2021 2021 - 04 (avr.)
# a function to print the data frame 

print_dataframe <-function(dataframe) {
  knitr::kable(dataframe, format = "html")

}

Phase Two

Cleaning The Data

The First problem that accures is the varibale are not in the right format * change the type of publication_date from character into date

flenn_df %>%  
  mutate(publication_date=lubridate::as_date(publication_date)) ->flenn_df

we change the counts to numric values :

flenn_df %>%  
  mutate(viewCount=as.numeric(viewCount)) %>%  
  mutate(commentCount=as.numeric(commentCount)) %>%  
  mutate(likeCount=as.numeric(likeCount)) -> flenn_df
print_dataframe(head(flenn_df))
X title publication_date viewCount commentCount likeCount year year_month
1 Flenn - Spam [ Clip Officiel ] 2022-06-20 4343834 10149 169423 2022 2022 - 06 (juin)
2 Flenn - One Shot #2 2015-10-31 807829 633 13179 2015 2015 - 10 (oct.)
3 Flenn - La Brune [ Clip Officiel ] 2021-11-11 4957727 9230 178846 2021 2021 - 11 (nov.)
4 Flenn - Chemsi Ghabet [ Clip Officiel ] 2016-04-21 12996968 16240 233950 2016 2016 - 04 (avr.)
5 Flenn - Tab Tab 2022-11-03 2906279 3733 66462 2022 2022 - 11 (nov.)
6 Flenn - Liyam [ Bande Originale ] 2021-04-05 78133094 29051 762337 2021 2021 - 04 (avr.)
colnames(flenn_df)
## [1] "X"                "title"            "publication_date" "viewCount"       
## [5] "commentCount"     "likeCount"        "year"             "year_month"

We Keep only the columns we Need :

flenn_df %>%  
  select(title,publication_date,viewCount,commentCount,likeCount) -> flenn

** PHASE THREE**

Analuyse the data

summrize the data

str(flenn)
## 'data.frame':    75 obs. of  5 variables:
##  $ title           : chr  "Flenn - Spam [ Clip Officiel ]" "Flenn - One Shot #2" "Flenn - La Brune [ Clip Officiel ]" "Flenn - Chemsi Ghabet [ Clip Officiel ]" ...
##  $ publication_date: Date, format: "2022-06-20" "2015-10-31" ...
##  $ viewCount       : num  4343834 807829 4957727 12996968 2906279 ...
##  $ commentCount    : num  10149 633 9230 16240 3733 ...
##  $ likeCount       : num  169423 13179 178846 233950 66462 ...
summary(flenn)
##     title           publication_date       viewCount         commentCount  
##  Length:75          Min.   :2013-04-24   Min.   :       0   Min.   :    0  
##  Class :character   1st Qu.:2017-05-02   1st Qu.: 1404834   1st Qu.: 1952  
##  Mode  :character   Median :2019-08-13   Median : 3332623   Median : 4086  
##                     Mean   :2019-06-03   Mean   : 6506938   Mean   : 6444  
##                     3rd Qu.:2021-10-06   3rd Qu.: 7427124   3rd Qu.: 9297  
##                     Max.   :2022-11-03   Max.   :78133094   Max.   :55721  
##    likeCount     
##  Min.   :     0  
##  1st Qu.: 34091  
##  Median : 80078  
##  Mean   :117349  
##  3rd Qu.:167796  
##  Max.   :762337

The First 3 video InTHE Channel

flenn %>%  
  arrange(publication_date) %>%  
  head(3) %>%  
  print_dataframe()
title publication_date viewCount commentCount likeCount
Flenn - Freestyle [ Clip Officiel ] 2013-04-24 1879313 2978 53699
Flenn - L’Menssi [ Clip Officiel ] 2013-11-29 1102266 1779 23690
Flenn - 7agg’Art [ Clip Officiel ] 2014-06-01 1244773 2176 26643

The Last 3 video In The Channel

flenn %>%  
  arrange(desc(publication_date)) %>%  
  head(3) %>%  
  print_dataframe()
title publication_date viewCount commentCount likeCount
Flenn - Tab Tab 2022-11-03 2906279 3733 66462
Flenn - Business 2022-11-03 1334225 3897 49441
Flenn - Bondia 2022-11-03 2685802 5149 75586

The most viewd videos

flenn %>%  
  arrange(desc(viewCount)) %>%  
  head(5) %>%  
  print_dataframe()
title publication_date viewCount commentCount likeCount
Flenn - Liyam [ Bande Originale ] 2021-04-05 78133094 29051 762337
Flenn - Recyclage 2021-10-17 27445257 55721 545860
Flenn - Calme [ Clip Officiel ] 2021-03-17 23342990 12985 286811
Flenn - Ça dépend Ft. Syc 2021-09-30 22806257 12437 278444
Flenn - Meryoula [ Clip Officiel ] 2020-02-14 22146547 7339 275501

Add a year mounth colmn

flenn %>%  
  mutate(year_month = paste(strftime(flenn$publication_date, "%Y"),
                              "-",
                              strftime(flenn$publication_date, "%m"),
                              paste("(",strftime(flenn$publication_date, "%b"), ")", sep=""))) -> flenn

### creat a year column

flenn %>%  
  mutate( year=(year(publication_date))) -> flenn
flenn %>%  
  arrange(desc(viewCount)) %>%  
  head(5) %>%  
  ggplot(aes(title,viewCount))+
  geom_point()+
  expand_limits( x = c(0,NA), y = c(0,NA)) +
  scale_y_continuous(labels = scales::comma)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

group the data per year

flenn %>%  
  group_by(year) %>%  
  summarise(views=sum(viewCount)) %>%  
  arrange(desc(views)) -> data_grouped_by_year
year views
2021 246829018
2020 98657451
2019 31581055
2022 26513238
2017 26323518
2016 25764213
2018 22916530
2015 5208938
2013 2981579
2014 1244773
ggplot(data_grouped_by_year,aes(year,views))+
  geom_point(color="brown")+
  scale_y_continuous(labels = scales::comma)+
  geom_abline() +
  theme(axis.title.x = element_text(colour = "brown"),
          axis.title.y = element_text(colour = "brown"))+
  theme(axis.line.x.bottom=element_line(color="brown"))

grouped by mounth

flenn %>%  
  group_by(year_month) %>%  
  summarise(views=sum(viewCount)) %>%  
  arrange(desc(views)) %>%  
  head(10) %>% 
  ggplot(aes(year_month,views))+
  geom_point(color="brown")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_y_continuous(labels = scales::comma)+
   theme(axis.title.x = element_text(colour = "brown"),
          axis.title.y = element_text(colour = "brown"))+
  theme(axis.line.x.bottom=element_line(color="brown"))

Seprate the video to two groups Clips and not clip

strings <- c("clip", "Clip", "CLIP", "Bande", "Ma Cabine", "Houma","Mafikch Niya")

flenn_df4 = flenn %>%  
  mutate(clip=(str_detect(flenn$title, paste(strings, collapse = "|"))))
Total_views=sum(flenn$viewCount)  
flenn_df4 %>%  
  group_by(clip) %>%  
  summarise(views=sum(viewCount),
              number =n(),
            pourcentage_of_views = (round(views/Total_views*100,2 ))) -> clip 
# save the dataframe as a csv file

write.csv(flenn,"flenn.csv")
getwd()
## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"
custom_colors <- viridis::mako(n = 10)

flenn%>%  
  arrange(desc(viewCount)) %>%  
  head(10) %>%  
  hchart('column', hcaes(x = title, y = viewCount,color = custom_colors)) %>%   hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b>Number of Reviews: </b> {point.y} <br>') %>% 
  hc_title(text = 'Most Popular Videos',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = 'By Number of Views',
              style = list(fontSize = '16px')) %>% 
  hc_credits(enabled = TRUE, text = '@Djouah')