getwd()

## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"

WHO IS FLENN:

Mahdi Kouloughli AKA FLENN AKA wlid bourouba is an Algerien rappeur

Why Him And Not Another One ?

First I chose Flenn to celebrate him being the most streamed Algerian artist on spotify, also to celebrate One year anniversary of his First album Flou

why youtube and not spotify ? :

The main raison to chose Youtube over spotify is that spotify don’t show the streams to the public,
mean while the data in youtube is open, and you can use an API to scrape the data

What Do We Expect to See ?

In this project I’ll do a full anlisis for the youtube channel also to the 3 projects that he has

Album flou
Mixtape Mouja
The famous serie of One shot

Phase One

Get the Data froù youtube

channel id

flenn_id ="UCpofNN0ZqT57J1gqr83yeRw"

get the data from youtube :

X	title	publication_date	viewCount	commentCount	likeCount	year	year_month
1	Flenn - Spam [ Clip Officiel ]	2022-06-20	4343834	10149	169423	2022	2022 - 06 (juin)
2	Flenn - One Shot #2	2015-10-31	807829	633	13179	2015	2015 - 10 (oct.)
3	Flenn - La Brune [ Clip Officiel ]	2021-11-11	4957727	9230	178846	2021	2021 - 11 (nov.)
4	Flenn - Chemsi Ghabet [ Clip Officiel ]	2016-04-21	12996968	16240	233950	2016	2016 - 04 (avr.)
5	Flenn - Tab Tab	2022-11-03	2906279	3733	66462	2022	2022 - 11 (nov.)
6	Flenn - Liyam [ Bande Originale ]	2021-04-05	78133094	29051	762337	2021	2021 - 04 (avr.)

# a function to print the data frame 

print_dataframe <-function(dataframe) {
  knitr::kable(dataframe, format = "html")

}

Phase Two

Cleaning The Data

The First problem that accures is the varibale are not in the right format * change the type of publication_date from character into date

flenn_df %>%  
  mutate(publication_date=lubridate::as_date(publication_date)) ->flenn_df

we change the counts to numric values :

flenn_df %>%  
  mutate(viewCount=as.numeric(viewCount)) %>%  
  mutate(commentCount=as.numeric(commentCount)) %>%  
  mutate(likeCount=as.numeric(likeCount)) -> flenn_df
print_dataframe(head(flenn_df))

X	title	publication_date	viewCount	commentCount	likeCount	year	year_month
1	Flenn - Spam [ Clip Officiel ]	2022-06-20	4343834	10149	169423	2022	2022 - 06 (juin)
2	Flenn - One Shot #2	2015-10-31	807829	633	13179	2015	2015 - 10 (oct.)
3	Flenn - La Brune [ Clip Officiel ]	2021-11-11	4957727	9230	178846	2021	2021 - 11 (nov.)
4	Flenn - Chemsi Ghabet [ Clip Officiel ]	2016-04-21	12996968	16240	233950	2016	2016 - 04 (avr.)
5	Flenn - Tab Tab	2022-11-03	2906279	3733	66462	2022	2022 - 11 (nov.)
6	Flenn - Liyam [ Bande Originale ]	2021-04-05	78133094	29051	762337	2021	2021 - 04 (avr.)

colnames(flenn_df)

## [1] "X"                "title"            "publication_date" "viewCount"       
## [5] "commentCount"     "likeCount"        "year"             "year_month"

We Keep only the columns we Need :

flenn_df %>%  
  select(title,publication_date,viewCount,commentCount,likeCount) -> flenn

PHASE THREE

Analuyse the data

summrize the data

str(flenn)

## 'data.frame':    75 obs. of  5 variables:
##  $ title           : chr  "Flenn - Spam [ Clip Officiel ]" "Flenn - One Shot #2" "Flenn - La Brune [ Clip Officiel ]" "Flenn - Chemsi Ghabet [ Clip Officiel ]" ...
##  $ publication_date: Date, format: "2022-06-20" "2015-10-31" ...
##  $ viewCount       : num  4343834 807829 4957727 12996968 2906279 ...
##  $ commentCount    : num  10149 633 9230 16240 3733 ...
##  $ likeCount       : num  169423 13179 178846 233950 66462 ...

summary(flenn)

##     title           publication_date       viewCount         commentCount  
##  Length:75          Min.   :2013-04-24   Min.   :       0   Min.   :    0  
##  Class :character   1st Qu.:2017-05-02   1st Qu.: 1404834   1st Qu.: 1952  
##  Mode  :character   Median :2019-08-13   Median : 3332623   Median : 4086  
##                     Mean   :2019-06-03   Mean   : 6506938   Mean   : 6444  
##                     3rd Qu.:2021-10-06   3rd Qu.: 7427124   3rd Qu.: 9297  
##                     Max.   :2022-11-03   Max.   :78133094   Max.   :55721  
##    likeCount     
##  Min.   :     0  
##  1st Qu.: 34091  
##  Median : 80078  
##  Mean   :117349  
##  3rd Qu.:167796  
##  Max.   :762337

The First 3 video InTHE Channel

flenn %>%  
  arrange(publication_date) %>%  
  head(3) %>%  
  print_dataframe()

title	publication_date	viewCount	commentCount	likeCount
Flenn - Freestyle [ Clip Officiel ]	2013-04-24	1879313	2978	53699
Flenn - L’Menssi [ Clip Officiel ]	2013-11-29	1102266	1779	23690
Flenn - 7agg’Art [ Clip Officiel ]	2014-06-01	1244773	2176	26643

The Last 3 video In The Channel

flenn %>%  
  arrange(desc(publication_date)) %>%  
  head(3) %>%  
  print_dataframe()

title	publication_date	viewCount	commentCount	likeCount
Flenn - Tab Tab	2022-11-03	2906279	3733	66462
Flenn - Business	2022-11-03	1334225	3897	49441
Flenn - Bondia	2022-11-03	2685802	5149	75586

The most viewd videos

flenn %>%  
  arrange(desc(viewCount)) %>%  
  head(5) %>%  
  print_dataframe()

title	publication_date	viewCount	commentCount	likeCount
Flenn - Liyam [ Bande Originale ]	2021-04-05	78133094	29051	762337
Flenn - Recyclage	2021-10-17	27445257	55721	545860
Flenn - Calme [ Clip Officiel ]	2021-03-17	23342990	12985	286811
Flenn - Ça dépend Ft. Syc	2021-09-30	22806257	12437	278444
Flenn - Meryoula [ Clip Officiel ]	2020-02-14	22146547	7339	275501

Add a year mounth colmn

flenn %>%  
  mutate(year_month = paste(strftime(flenn$publication_date, "%Y"),
                              "-",
                              strftime(flenn$publication_date, "%m"),
                              paste("(",strftime(flenn$publication_date, "%b"), ")", sep=""))) -> flenn

### creat a year column

flenn %>%  
  mutate( year=(year(publication_date))) -> flenn

flenn %>%  
  arrange(desc(viewCount)) %>%  
  head(5) %>%  
  ggplot(aes(title,viewCount))+
  geom_point()+
  expand_limits( x = c(0,NA), y = c(0,NA)) +
  scale_y_continuous(labels = scales::comma)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

group the data per year

flenn %>%  
  group_by(year) %>%  
  summarise(views=sum(viewCount)) %>%  
  arrange(desc(views)) -> data_grouped_by_year

year	views
2021	246829018
2020	98657451
2019	31581055
2022	26513238
2017	26323518
2016	25764213
2018	22916530
2015	5208938
2013	2981579
2014	1244773

ggplot(data_grouped_by_year,aes(year,views))+
  geom_point(color="brown")+
  scale_y_continuous(labels = scales::comma)+
  geom_abline() +
  theme(axis.title.x = element_text(colour = "brown"),
          axis.title.y = element_text(colour = "brown"))+
  theme(axis.line.x.bottom=element_line(color="brown"))

grouped by mounth

flenn %>%  
  group_by(year_month) %>%  
  summarise(views=sum(viewCount)) %>%  
  arrange(desc(views)) %>%  
  head(10) %>% 
  ggplot(aes(year_month,views))+
  geom_point(color="brown")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_y_continuous(labels = scales::comma)+
   theme(axis.title.x = element_text(colour = "brown"),
          axis.title.y = element_text(colour = "brown"))+
  theme(axis.line.x.bottom=element_line(color="brown"))

Seprate the video to two groups Clips and not clip

strings <- c("clip", "Clip", "CLIP", "Bande", "Ma Cabine", "Houma","Mafikch Niya")

flenn_df4 = flenn %>%  
  mutate(clip=(str_detect(flenn$title, paste(strings, collapse = "|"))))

Total_views=sum(flenn$viewCount)

flenn_df4 %>%  
  group_by(clip) %>%  
  summarise(views=sum(viewCount),
              number =n(),
            pourcentage_of_views = (round(views/Total_views*100,2 ))) -> clip

# save the dataframe as a csv file

write.csv(flenn,"flenn.csv")
getwd()

## [1] "C:/Users/Mr T/Desktop/test R/Flenn Youtube/flenn"

custom_colors <- viridis::mako(n = 10)

flenn%>%  
  arrange(desc(viewCount)) %>%  
  head(10) %>%  
  hchart('column', hcaes(x = title, y = viewCount,color = custom_colors)) %>%   hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b>Number of Reviews: </b> {point.y} <br>') %>% 
  hc_title(text = 'Most Popular Videos',
           style = list(fontSize = '25px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = 'By Number of Views',
              style = list(fontSize = '16px')) %>% 
  hc_credits(enabled = TRUE, text = '@Djouah')

Flenn Youtube Channel

WHO IS FLENN:

Why Him And Not Another One ?

why youtube and not spotify ? :

What Do We Expect to See ?

Phase One

Get the Data froù youtube

get the data from youtube :

Phase Two

Cleaning The Data

We Keep only the columns we Need :

PHASE THREE

Analuyse the data

Add a year mounth colmn

group the data per year

grouped by mounth

Seprate the video to two groups Clips and not clip

Flenn Youtube Channel

WHO IS FLENN:

Why Him And Not Another One ?

why youtube and not spotify ? :

What Do We Expect to See ?

Phase One

Get the Data froù youtube

get the data from youtube :

Phase Two

Cleaning The Data

We Keep only the columns we Need :

** PHASE THREE**

Analuyse the data

Add a year mounth colmn

group the data per year

grouped by mounth

Seprate the video to two groups Clips and not clip

PHASE THREE