getwd()
## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"
WHO IS FLENN:
Mahdi Kouloughli AKA FLENN AKA wlid bourouba is an Algerien rappeur
Why Him And Not Another One ?
First I chose Flenn to celebrate him being the most streamed Algerian artist on spotify, also to celebrate One year anniversary of his First album Flou
why youtube and not spotify ? :
The main raison to chose Youtube over spotify is that spotify don’t
show the streams to the public,
mean while the data in youtube is
open, and you can use an API to scrape the data
What Do We Expect to See ?
In this project I’ll do a full anlisis for the youtube channel also to the 3 projects that he has
channel id
flenn_id ="UCpofNN0ZqT57J1gqr83yeRw"
X | title | publication_date | viewCount | commentCount | likeCount | year | year_month |
---|---|---|---|---|---|---|---|
1 | Flenn - Spam [ Clip Officiel ] | 2022-06-20 | 4343834 | 10149 | 169423 | 2022 | 2022 - 06 (juin) |
2 | Flenn - One Shot #2 | 2015-10-31 | 807829 | 633 | 13179 | 2015 | 2015 - 10 (oct.) |
3 | Flenn - La Brune [ Clip Officiel ] | 2021-11-11 | 4957727 | 9230 | 178846 | 2021 | 2021 - 11 (nov.) |
4 | Flenn - Chemsi Ghabet [ Clip Officiel ] | 2016-04-21 | 12996968 | 16240 | 233950 | 2016 | 2016 - 04 (avr.) |
5 | Flenn - Tab Tab | 2022-11-03 | 2906279 | 3733 | 66462 | 2022 | 2022 - 11 (nov.) |
6 | Flenn - Liyam [ Bande Originale ] | 2021-04-05 | 78133094 | 29051 | 762337 | 2021 | 2021 - 04 (avr.) |
# a function to print the data frame
print_dataframe <-function(dataframe) {
knitr::kable(dataframe, format = "html")
}
The First problem that accures is the varibale are not in the right format * change the type of publication_date from character into date
flenn_df %>%
mutate(publication_date=lubridate::as_date(publication_date)) ->flenn_df
we change the counts to numric values :
flenn_df %>%
mutate(viewCount=as.numeric(viewCount)) %>%
mutate(commentCount=as.numeric(commentCount)) %>%
mutate(likeCount=as.numeric(likeCount)) -> flenn_df
print_dataframe(head(flenn_df))
X | title | publication_date | viewCount | commentCount | likeCount | year | year_month |
---|---|---|---|---|---|---|---|
1 | Flenn - Spam [ Clip Officiel ] | 2022-06-20 | 4343834 | 10149 | 169423 | 2022 | 2022 - 06 (juin) |
2 | Flenn - One Shot #2 | 2015-10-31 | 807829 | 633 | 13179 | 2015 | 2015 - 10 (oct.) |
3 | Flenn - La Brune [ Clip Officiel ] | 2021-11-11 | 4957727 | 9230 | 178846 | 2021 | 2021 - 11 (nov.) |
4 | Flenn - Chemsi Ghabet [ Clip Officiel ] | 2016-04-21 | 12996968 | 16240 | 233950 | 2016 | 2016 - 04 (avr.) |
5 | Flenn - Tab Tab | 2022-11-03 | 2906279 | 3733 | 66462 | 2022 | 2022 - 11 (nov.) |
6 | Flenn - Liyam [ Bande Originale ] | 2021-04-05 | 78133094 | 29051 | 762337 | 2021 | 2021 - 04 (avr.) |
colnames(flenn_df)
## [1] "X" "title" "publication_date" "viewCount"
## [5] "commentCount" "likeCount" "year" "year_month"
flenn_df %>%
select(title,publication_date,viewCount,commentCount,likeCount) -> flenn
summrize the data
str(flenn)
## 'data.frame': 75 obs. of 5 variables:
## $ title : chr "Flenn - Spam [ Clip Officiel ]" "Flenn - One Shot #2" "Flenn - La Brune [ Clip Officiel ]" "Flenn - Chemsi Ghabet [ Clip Officiel ]" ...
## $ publication_date: Date, format: "2022-06-20" "2015-10-31" ...
## $ viewCount : num 4343834 807829 4957727 12996968 2906279 ...
## $ commentCount : num 10149 633 9230 16240 3733 ...
## $ likeCount : num 169423 13179 178846 233950 66462 ...
summary(flenn)
## title publication_date viewCount commentCount
## Length:75 Min. :2013-04-24 Min. : 0 Min. : 0
## Class :character 1st Qu.:2017-05-02 1st Qu.: 1404834 1st Qu.: 1952
## Mode :character Median :2019-08-13 Median : 3332623 Median : 4086
## Mean :2019-06-03 Mean : 6506938 Mean : 6444
## 3rd Qu.:2021-10-06 3rd Qu.: 7427124 3rd Qu.: 9297
## Max. :2022-11-03 Max. :78133094 Max. :55721
## likeCount
## Min. : 0
## 1st Qu.: 34091
## Median : 80078
## Mean :117349
## 3rd Qu.:167796
## Max. :762337
The First 3 video InTHE Channel
flenn %>%
arrange(publication_date) %>%
head(3) %>%
print_dataframe()
title | publication_date | viewCount | commentCount | likeCount |
---|---|---|---|---|
Flenn - Freestyle [ Clip Officiel ] | 2013-04-24 | 1879313 | 2978 | 53699 |
Flenn - L’Menssi [ Clip Officiel ] | 2013-11-29 | 1102266 | 1779 | 23690 |
Flenn - 7agg’Art [ Clip Officiel ] | 2014-06-01 | 1244773 | 2176 | 26643 |
The Last 3 video In The Channel
flenn %>%
arrange(desc(publication_date)) %>%
head(3) %>%
print_dataframe()
title | publication_date | viewCount | commentCount | likeCount |
---|---|---|---|---|
Flenn - Tab Tab | 2022-11-03 | 2906279 | 3733 | 66462 |
Flenn - Business | 2022-11-03 | 1334225 | 3897 | 49441 |
Flenn - Bondia | 2022-11-03 | 2685802 | 5149 | 75586 |
The most viewd videos
flenn %>%
arrange(desc(viewCount)) %>%
head(5) %>%
print_dataframe()
title | publication_date | viewCount | commentCount | likeCount |
---|---|---|---|---|
Flenn - Liyam [ Bande Originale ] | 2021-04-05 | 78133094 | 29051 | 762337 |
Flenn - Recyclage | 2021-10-17 | 27445257 | 55721 | 545860 |
Flenn - Calme [ Clip Officiel ] | 2021-03-17 | 23342990 | 12985 | 286811 |
Flenn - Ça dépend Ft. Syc | 2021-09-30 | 22806257 | 12437 | 278444 |
Flenn - Meryoula [ Clip Officiel ] | 2020-02-14 | 22146547 | 7339 | 275501 |
flenn %>%
mutate(year_month = paste(strftime(flenn$publication_date, "%Y"),
"-",
strftime(flenn$publication_date, "%m"),
paste("(",strftime(flenn$publication_date, "%b"), ")", sep=""))) -> flenn
### creat a year column
flenn %>%
mutate( year=(year(publication_date))) -> flenn
flenn %>%
arrange(desc(viewCount)) %>%
head(5) %>%
ggplot(aes(title,viewCount))+
geom_point()+
expand_limits( x = c(0,NA), y = c(0,NA)) +
scale_y_continuous(labels = scales::comma)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
flenn %>%
group_by(year) %>%
summarise(views=sum(viewCount)) %>%
arrange(desc(views)) -> data_grouped_by_year
year | views |
---|---|
2021 | 246829018 |
2020 | 98657451 |
2019 | 31581055 |
2022 | 26513238 |
2017 | 26323518 |
2016 | 25764213 |
2018 | 22916530 |
2015 | 5208938 |
2013 | 2981579 |
2014 | 1244773 |
ggplot(data_grouped_by_year,aes(year,views))+
geom_point(color="brown")+
scale_y_continuous(labels = scales::comma)+
geom_abline() +
theme(axis.title.x = element_text(colour = "brown"),
axis.title.y = element_text(colour = "brown"))+
theme(axis.line.x.bottom=element_line(color="brown"))
flenn %>%
group_by(year_month) %>%
summarise(views=sum(viewCount)) %>%
arrange(desc(views)) %>%
head(10) %>%
ggplot(aes(year_month,views))+
geom_point(color="brown")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_y_continuous(labels = scales::comma)+
theme(axis.title.x = element_text(colour = "brown"),
axis.title.y = element_text(colour = "brown"))+
theme(axis.line.x.bottom=element_line(color="brown"))
strings <- c("clip", "Clip", "CLIP", "Bande", "Ma Cabine", "Houma","Mafikch Niya")
flenn_df4 = flenn %>%
mutate(clip=(str_detect(flenn$title, paste(strings, collapse = "|"))))
Total_views=sum(flenn$viewCount)
flenn_df4 %>%
group_by(clip) %>%
summarise(views=sum(viewCount),
number =n(),
pourcentage_of_views = (round(views/Total_views*100,2 ))) -> clip
# save the dataframe as a csv file
write.csv(flenn,"flenn.csv")
getwd()
## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"
custom_colors <- viridis::mako(n = 10)
flenn%>%
arrange(desc(viewCount)) %>%
head(10) %>%
hchart('column', hcaes(x = title, y = viewCount,color = custom_colors)) %>% hc_add_theme(hc_theme_google()) %>%
hc_tooltip(pointFormat = '<b>Number of Reviews: </b> {point.y} <br>') %>%
hc_title(text = 'Most Popular Videos',
style = list(fontSize = '25px', fontWeight = 'bold')) %>%
hc_subtitle(text = 'By Number of Views',
style = list(fontSize = '16px')) %>%
hc_credits(enabled = TRUE, text = '@Djouah')