library(readxl)
library(tibble)
library(ggplot2)
library(tidyverse)
library(tm)
library(showtext)
library(lubridate)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(officer)
library(dplyr)
library(showtext)
library(tidyr)
library(knitr)
library(kableExtra)
library(cowplot)
library(colorspace)
library(ggrepel)
library(sf)
#library(tmap)    # for static and interactive maps
library(leaflet) # for interactive maps
#library(spData)
library(rnaturalearth)
library(leaflet.extras)
library(sp)
library(wbstats)
library(formattable)
library(rvest)
library(XML)
library(BBmisc)
library(xml2)
library(fmsb)
library(colormap)
library(circlize)
library(networkD3)
library(influential) #to create Sankey Diagram
library(igraph) #to create Sankey Diagram
library(oce) #to create Sankey Diagram
library(ggraph) #to create Sankey Diagram
library(devtools) #to add some external libraries
library(addTextLabels)
library(openxlsx)
library(data.tree) #to create a hierarchy
library(htmlwidgets) #to save interative graphs
library(circlepackeR) #to plot circles
library(geomtextpath)
#devtools::install_github("jeromefroe/circlepackeR") # If needed

font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Black.otf")
font_add(family = "AvertaPE-Black.otf",
         regular = "AvertaPE-Black.otf")
font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Regular.otf")
font_add(family = "AvertaPE-Regular.otf",
         regular = "AvertaPE-Regular.otf")

purple <- c("#00051E") #c("#2C2C54")
pink <- c("#A40E4C")
blue <- c("#2E86AB")
yellow <- c("#FF9C00")
lila <- c("#E3DFFF")
brown <- c("#C3979F")
grey <- c("#BFBFBF")
white <- c("#FFFFFF")
mycols2 <- c(blue,purple)
mycols3 <- c(purple,pink,blue)
mycols4 <- c(purple,pink,blue,blue,yellow)
mycols5 <- c(white,blue,purple)
allcols <- c(purple,blue,pink,yellow,lila,brown,grey)


mypal <- function(nbcol){
  colsample <- allcols[1:nbcol]
  return(colsample)
}

Introduction

Le cinéma ne dit pas autrement les choses, il dit autre chose.
The cinema does not say things differently, it says something else.

Éric Rohmer, French Director

First of all, the following analysis is purely subjective and is in no way representative of global consumption behaviour.
It is, however, representative of my cinema consumption since I was 20 years old and the data has been meticulously collected to arrive at this conclusion which is a snapshot at a given moment of my cinephilia with the biases that it generates: I am a young Frenchman who has been more or less influenced by his choice of films and who obviously has tastes that cannot be explained but that can be identified.

Overview of my Database

ToSeeFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Film à voir")

#Nettoyage
CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"

# GlobalInfos <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Bilan")
NamesFilm <- bind_rows(CritiqueFilm, ToSeeFilm) #Merge of my two databases
NamesFilm <- NamesFilm[!is.na(NamesFilm$`Titre du film`),]
NamesFilm$Seen <- TRUE
NamesFilm$Seen[is.na(NamesFilm$Scénario)] <- FALSE
# NamesFilm$Année <- as.numeric(format(NamesFilm$`Date de sortie`, format = "%Y"))
# NamesFilm$Décénie <- round(NamesFilm$Année/10,0)*10


NamesFilm$`Notes cummulées`[is.na(NamesFilm$`Notes cummulées`)] <- NamesFilm$`Note Presse`[is.na(NamesFilm$`Notes cummulées`)]*2

NamesFilm$Mois <- NamesFilm$`Date de sortie` %>% format("%m") %>% as.numeric()

IMDB <- NamesFilm %>%
  filter(is.na(`IMDB ID`),`Pays d'origine`!="France") %>% 
  select(`English Title`,Année, Réalisateur,`IMDB ID`)
  

# Nombre de films notés
count_movies_seen <- length(CritiqueFilm$`Titre du film`)

# Nombre de films à voir
count_movies_tosee <- length(ToSeeFilm$`Titre du film`)

# Nombre total de film
count_total <- count_movies_seen + count_movies_tosee


# Le meilleur réalisateur selon nos notes (minimum 3 films)
# Le meilleur réalisateur selon la presse (minimum 3 films)
# Le réalisateur le plus sous-côté (minimum 3 films)
Director_table <- as.data.frame(table(CritiqueFilm$Réalisateur))
Director_table$`Nos notes` <- Director_table$`Note Presse` <- 0
colnames(Director_table)[1] <- "Director"
Director_table <- Director_table[Director_table$Freq>=3,]

for (n in 1:length(Director_table$Director)){
  Director_table$`Nos notes`[n] <- mean(CritiqueFilm$`Nos notes`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
  Director_table$`Note Presse`[n] <- mean(CritiqueFilm$`Note Presse`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
}

Director_table$Surcote <- Director_table$`Nos notes`-Director_table$`Note Presse`

Best_director_forme <- as.character(Director_table$Director[Director_table$`Nos notes`==max(Director_table$`Nos notes`)])[1]
Best_director_forpresse <- as.character(Director_table$Director[Director_table$`Note Presse`==max(Director_table$`Note Presse`)])[1]
Surcote_director <- as.character(Director_table$Director[Director_table$Surcote==max(Director_table$Surcote)])[1]


# Le meilleur acteur (minimum 3 films)
# Le deuxième meilleur acteur (minimum 3 films)
# Le troisième meilleur acteur (minimum 3 films)
# L'acteur le plus prolifique


Acteur1 <- select(CritiqueFilm,`Acteur 1`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur2 <- select(CritiqueFilm,`Acteur 2`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur3 <- select(CritiqueFilm,`Acteur 3`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
colnames(Acteur3)[1] <- colnames(Acteur2)[1] <- colnames(Acteur1)[1] <- "Acteur"
Acteur_merge <- rbind(Acteur1,Acteur2,Acteur3)
rm(Acteur1,Acteur2,Acteur3)

Acteur <- as.data.frame(table(Acteur_merge$Acteur))
colnames(Acteur) <- c("Acteur","Freq") 
Acteur <- Acteur[order(-Acteur$Freq),]
Acteur_Max <- as.character(Acteur$Acteur)[1]

for (t in 1:nrow(Acteur)){
  Acteur$Notes[t] <- round(mean(Acteur_merge$`Nos notes`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
  Acteur$Presse[t] <- round(mean(Acteur_merge$`Note Presse`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
}

Acteur$Total <- Acteur$Notes + Acteur$Presse
Acteur <- Acteur[Acteur$Freq>3,]
Acteur <- Acteur[order(-Acteur$Total),]
Best_actor <- as.character(Acteur$Acteur[1:3])

# La meilleure année selon les films notés
Year_data <- as.data.frame(table(CritiqueFilm$Année))
colnames(Year_data)[1] <- "Year"

for (y in 1:length(Year_data$Year)){
  Year_data$Note[y] <- mean(CritiqueFilm$`Notes cummulées`[Year_data$Year[y]==CritiqueFilm$Année], na.rm = T)
}

Year_data <- Year_data[Year_data$Freq>=5,]
Best_year <- as.character(Year_data$Year[Year_data$Note==max(Year_data$Note, na.rm = T)])

# Le meilleur mois pour aller voir un film au cinéma en France
Month <- as.data.frame(table(select(CritiqueFilm,Mois)))

for (m in 1:length(Month$Var1)){
  Month$Grade[m] <- mean(CritiqueFilm$`Nos notes`[Month$Var1[m]==CritiqueFilm$Mois], na.rm = T)
}

Best_Month <- month.name[as.numeric(Month$Var1[max(Month$Grade)==Month$Grade])]


# La meilleure société de distribution en fonction des notes
Distri <- select(CritiqueFilm,`Maison de distribution`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison de distribution`))
Distri_table <- Distri_table[Distri_table$Freq>3,]

for (m in 1:length(Distri_table$Var1)){
  Distri_table$Note[m] <- mean(Distri$`Nos notes`[Distri$`Maison de distribution`==Distri_table$Var1[m]],na.rm = T)
}
Best_distri <- as.character(Distri_table$Var1[max(Distri_table$Note)==Distri_table$Note])

# Maison de distribution avec le plus de parts de marché

Distri <- select(CritiqueFilm,`Maison mère`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison mère`))
Distri_table <- Distri_table[Distri_table$Var1!="France",]
Distri_table <- Distri_table[order(-Distri_table$Freq),]

Most_Distri <- as.character(Distri_table$Var1[1])
Most_Distri_Percent <- round(sum(Most_Distri==CritiqueFilm$`Maison mère`, na.rm = T)*100/count_movies_seen,1)

Total_percent <- round(100*count_movies_seen/(count_movies_tosee+count_movies_seen),1)


Duration <- mean(CritiqueFilm$Durée, na.rm = T)
Duration_txt <- Duration/60
Duration_txt <- paste0(as.integer(Duration_txt),"h",round((Duration_txt-as.integer(Duration_txt))*60))

Beginning <- as.Date("2019-09-12")
Count_days <- as.numeric(Sys.Date()-Beginning)

Duration_seen <- (Duration*count_movies_seen/Count_days)
Duration_txt_seen <- Duration_seen/60
Duration_txt_seen <- paste0(as.integer(Duration_txt_seen),"h",round((Duration_txt_seen-as.integer(Duration_txt_seen))*60))
Films_per_day <- round(count_movies_seen/Count_days,2)

Filmtoaddparday <- sum(CritiqueFilm$Année==2018 | CritiqueFilm$Année==2019, na.rm=T)/360
Nb_day <- count_movies_tosee*Duration/Duration_seen
Nb_day2 <- round(Nb_day+Nb_day*Filmtoaddparday,0)

Nb_day <- Sys.Date()+Nb_day
Nb_day2 <- Sys.Date()+Nb_day2

Sub <- paste0("Based on ",count_movies_seen," movies seen")

As a lifelong film enthusiast, I created a database in September 2019 (1139 days ago) allowing me to track the films I watch and to structure my cinephilia.
So I have seen 856 films in the last few years and I have a list of over 1328 films to see. This analysis is therefore evolving!
Who are my favourite directors? What are the best films according to me and according to the press? What kind of films are the most represented? Which actor is the most present in my filmography? All these questions will be answered in this report! I will start by giving you an overview of my film consumption and then go into more detail in the dedicated sections.

The recipe for a good film? Still unknown but if I had to summarize the 856 films I have seen, this is what I can say:

The best director according to our ratings (minimum 3 films) : Gore Verbinski
The best director according to the press (minimum 3 films) : Peter Jackson
The most underrated director (minimum 3 films) : Louis Leterrier
Best actor (minimum 3 films) : Viggo Mortensen
Second best actor (minimum 3 films) : Tim Allen
Third best actor (minimum 3 films) : Leonardo DiCaprio
Most prolific actor: Hugh Jackman
The best year according to the rated films : 1994
The best month to see a film in France: May
The best distribution company according to the scores : Pixar
Distribution company with the highest market share : Disney (25.2%)
Percentage of advancement : 39.2%
Average duration of a film : 1h51
Daily time spent watching films : 1h23 or 0.75 film per day
End date if no film is added to my list again : 2027-08-27
End date if I add films at the same rate as today : 2029-03-28

Explanation of my scoring system

In order to establish a ranking of films, actors, directors… I had to decide on some rating criteria that will allow me to evaluate the main elements that make up a film. So here are the 5 criteria I rate out of 5:

Explication_Sample <- CritiqueFilm[CritiqueFilm$Grade=="A" & CritiqueFilm$`Note Presse`>=4 & CritiqueFilm$Saga=="Saga" & !is.na(CritiqueFilm$`English Title`) & CritiqueFilm$`Pays d'origine`!="France",]
Explication_Sample <- select(Explication_Sample,`English Title`,Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`,`Aspect Sonore`)
Explication_Sample <- as.data.frame(Explication_Sample)


Explication_Sample_Scenario <- sample(Explication_Sample$`English Title`[Explication_Sample$Scénario==5],3)
Explication_Sample_Scenario <- paste0(Explication_Sample_Scenario[1],", ",Explication_Sample_Scenario[2]," or ",Explication_Sample_Scenario[3])
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Scenario),]

Explication_Sample_Acteur <- sample(Explication_Sample$`English Title`[Explication_Sample$`Acteurs / Personnages`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Acteur),]
Explication_Sample_Acteur <- paste0(Explication_Sample_Acteur[1],", ",Explication_Sample_Acteur[2]," or ",Explication_Sample_Acteur[3])

Explication_Sample_Ambiance <- sample(Explication_Sample$`English Title`[Explication_Sample$`Ambiance / Concept`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Ambiance),]
Explication_Sample_Ambiance <- paste0(Explication_Sample_Ambiance[1],", ",Explication_Sample_Ambiance[2]," or ",Explication_Sample_Ambiance[3])

Explication_Sample_Visuel <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Visuel`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Visuel),]
Explication_Sample_Visuel <- paste0(Explication_Sample_Visuel[1],", ",Explication_Sample_Visuel[2]," or ",Explication_Sample_Visuel[3])

Explication_Sample_Sonore <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Sonore`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Sonore),]
Explication_Sample_Sonore <- paste0(Explication_Sample_Sonore[1],", ",Explication_Sample_Sonore[2]," or ",Explication_Sample_Sonore[3])

scoring_system <- as.data.frame(matrix(data = NA, nrow = 5, ncol = 2))
scoring_system[,1] <- c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect")
scoring_system[1,2] <- mean(CritiqueFilm$Scénario, na.rm = T)
scoring_system[2,2] <- mean(CritiqueFilm$`Acteurs / Personnages`, na.rm = T)
scoring_system[3,2] <- mean(CritiqueFilm$`Ambiance / Concept`, na.rm = T)
scoring_system[4,2] <- mean(CritiqueFilm$`Aspect Visuel`, na.rm = T)
scoring_system[5,2] <- mean(CritiqueFilm$`Aspect Sonore`, na.rm = T)
scoring_system[,2] <- round(scoring_system[,2],2)
colnames(scoring_system) <- c("Categories","Grade")

scoring_system$ID <- 1:5

scoring_system_graph <- ggplot(scoring_system, aes(x=Categories, y=Grade))+
  geom_hline(yintercept = mean(scoring_system[,2]),col = grey)+
  geom_segment( aes(x=ID, xend=ID, y=3, yend=Grade), col = blue)+
  geom_point(size = 3, color = purple, fill = "white",shape=21, stroke=2)+
  geom_text(aes(label = Grade),
            size=3,
            hjust=0.5,
            vjust=-1.5,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_x_discrete(guide = guide_axis(n.dodge=2),
                   limits = c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect"))+
  ylim(3,4.5)+
  labs(title="Average of grades per Categorie")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

scoring_system_graph

The scenario: Essential for a good film, it keeps us on the edge of our seats, makes us passionate, questions us and is in my opinion the most important. A film with an impeccable visual quality without a script will remain a bad film. Here are for example 3 films that I evaluated with an excellent script: 1917, Shutter Island or Forrest Gump
Actors and characters: This category is an indissociable part of the rating system and allows us to identify whether the casting is successful and therefore whether the actors are good and correspond perfectly to the character they play. This category is obviously rated higher than the others since the actors contribute most to the credibility of a film and most of the time give their best as in : Gone Girl, Interstellar or Hachi: A Dog’s Tale
Atmosphere and concept: Each film has its own universe that can transport us and sometimes we want to see more… or not! The atmosphere of the film allows us to stay hooked to the plot and to feel unique emotions. The concept allows innovation in an environment that we think is already saturated but we will see that many recent films have really new concepts like : The Grand Budapest Hotel, The Green Mile or The Prestige
Visual aspect: The aesthetics of the film is a central element. The visual aspect consists in evaluating the visual beauty of the film, its risk-taking, its camera movements, its editing, its special effects, its photography etc. Here are some films with an interesting visual aspect: Ready Player One, The Curious Case of Benjamin Button or Your Name.
Sound aspect: Finally, the sound aspect echoes the atmosphere of the film as it includes both the soundtrack and all the work done on sound, sound effects etc. to make it all coherent. Although the soundtrack has a central place in the evaluation of this criterion, some films enjoy quite incredible sound effects that sometimes absorb the musical theme. Here are 3 films with impeccable sound effects: Titanic, The Imitation Game or Whiplash

Top of my movies

TopFilms <- CritiqueFilm[str_detect(CritiqueFilm$`Noté par`, "Théotime", negate = FALSE),]

TopFilms <- select(TopFilms,`English Title`,Année,`Emoji Pays`, Réalisateur,`Notes cummulées`)
TopFilms <- TopFilms[order(-TopFilms$`Notes cummulées`),]
colnames(TopFilms) <- c("Title","Year","Country","Director","Grade")
Top <- 100
TopFilms <- head(TopFilms,Top)
TopFilms$Country[TopFilms$Country=="United States of America"] <- "USA"
TopFilms$Rank <- 1:Top
TopFilms <- TopFilms %>% select(6,1:5)

TopFilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("c","l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(6, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")

Rank	Title	Year	Country	Director	Grade
1	Forrest Gump	1994	🇺🇸	Robert Zemeckis	9.6
2	The Dark Knight	2008	🇺🇸	Christopher Nolan	9.5
3	The Lion King	1994	🇺🇸	Roger Allers	9.5
4	Joker	2019	🇺🇸	Todd Philips	9.5
5	The Green Mile	2000	🇺🇸	Frank Darabont	9.5
6	The Lord of the Rings: The Fellowship of the Ring	2001	🇺🇸	Peter Jackson	9.5
7	The Lord of the Rings: The Two Towers	2002	🇺🇸	Peter Jackson	9.5
8	The Lord of the Rings: The Return of the King	2003	🇺🇸	Peter Jackson	9.5
9	Pulp Fiction	1994	🇺🇸	Quentin Tarantino	9.5
10	1917	2020	🇬🇧	Sam Mendes	9.4
11	Bohemian Rhapsody	2018	🇺🇸	Bryan Singer	9.4
12	Dune	2021	🇺🇸	Denis Villeneuve	9.4
13	Interstellar	2014	🇺🇸	Christopher Nolan	9.4
14	Spider-Man: Into the Spider-Verse	2018	🇺🇸	Peter Ramsey	9.4
15	Zack Snyder’s Justice League	2021	🇺🇸	Zack Snyder	9.3
16	Soul	2020	🇺🇸	Pete Docter	9.3
17	Kingsman: The Secret Service	2015	🇺🇸	Matthew Vaughn	9.3
18	Léon: The Professional	1994	🇫🇷	Luc Besson	9.3
19	Slumdog Millionaire	2009	🇬🇧	Danny Boyle	9.3
20	Titanic	1998	🇺🇸	James Cameron	9.3
21	Toy Story 3	2010	🇺🇸	Lee Unkrich	9.2
22	How to Train Your Dragon	2010	🇺🇸	Dean DeBlois	9.2
23	Inception	2010	🇺🇸	Christopher Nolan	9.2
24	Jurassic Park	1993	🇺🇸	Steven Spielberg	9.2
25	Guardians of the Galaxy	2014	🇺🇸	James Gunn	9.2
26	Back to the Future	1985	🇺🇸	Robert Zemeckis	9.2
27	Star Wars: Episode III – Revenge of the Sith	2005	🇺🇸	George Lucas	9.2
28	Star Wars : Episode V – The Empire Strikes Back	1980	🇺🇸	Irvin Kershner	9.2
29	Star Wars: Episode VI – Return of the Jedi	1983	🇺🇸	Richard Marquand	9.2
30	Hacksaw Ridge	2016	🇺🇸	Mel Gibson	9.2
31	Coco	2017	🇺🇸	Lee Unkrich	9.1
32	Green Book	2018	🇺🇸	Peter Farrelly	9.1
33	Harry Potter and the Deathly Hallows: Part 2	2011	🇺🇸	David Yates	9.1
34	Skyfall	2012	🇬🇧	Sam Mendes	9.1
35	The Curious Case of Benjamin Button	2009	🇺🇸	David Fincher	9.1
36	Rise of the Planet of the Apes	2011	🇺🇸	Rupert Wyatt	9.1
37	The Pianist	2002	🇫🇷	Roman Polanski	9.1
38	The Shawshank Redemption	1995	🇺🇸	Frank Darabont	9.1
39	Guardians of the Galaxy Vol. 2	2017	🇺🇸	James Gunn	9.1
40	Avengers: Infinity War	2018	🇺🇸	Frères Russo	9.1
41	Spider-Man: No Way Home	2021	🇺🇸	Jon Watts	9.1
42	Parasite	2019	🇰🇷	Bong Joon-ho	9.1
43	Rogue One: A Star Wars Story	2016	🇺🇸	Gareth Edwards	9.1
44	The Incredibles	2004	🇺🇸	Brad Bird	9.0
45	Incredibles 2	2018	🇺🇸	Brad Bird	9.0
46	The Great Gatsby	2013	🇺🇸	Baz Luhrmann	9.0
47	Casino Royale	2006	🇬🇧	Martin Campbell	9.0
48	Kick-Ass	2010	🇺🇸	Matthew Vaughn	9.0
49	Life of Pi	2012	🇺🇸	Ang Lee	9.0
50	Ford v Ferrari	2019	🇺🇸	James Mangold	9.0
51	Marvel’s The Avengers	2012	🇺🇸	Joss Whedon	9.0
52	Avengers: Endgame	2019	🇺🇸	Frères Russo	9.0
53	Pirates of the Caribbean: The Curse of the Black Pearl	2003	🇺🇸	Gore Verbinski	9.0
54	Spider-Man	2002	🇺🇸	Sam Raimi	9.0
55	Your Name.	2016	🇯🇵	Makoto Shinkai	9.0
56	X-Men: Days of Future Past	2014	🇺🇸	Bryan Singer	9.0
57	War for the Planet of the Apes	2017	🇺🇸	Matt Reeves	8.9
58	Limitless	2011	🇺🇸	Neil Burger	8.9
59	Sherlock Holmes	2010	🇺🇸	Guy Ritchie	8.9
60	Star Trek Into Darkness	2013	🇺🇸	J. J. Abrams	8.9
61	Knives Out	2019	🇺🇸	Rian Johnson	8.9
62	Aladdin	1992	🇺🇸	John Musker et Ron Clements	8.9
63	Toy Story	1996	🇺🇸	John Lasseter	8.9
64	WALL‐E	2008	🇺🇸	Andrew Stanton	8.9
65	The Prestige	2006	🇺🇸	Christopher Nolan	8.9
66	The Batman	2022	🇺🇸	Matt Reeves	8.9
67	Charlie and the Chocolate Factory	2005	🇺🇸	Tim Burton	8.8
68	Monsters, Inc.	2002	🇺🇸	Pete Docter	8.8
69	Zootopia	2016	🇺🇸	Byron Howard	8.8
70	How to Train Your Dragon 2	2014	🇺🇸	Dean DeBlois	8.8
71	Gladiator	2000	🇺🇸	Ridley Scott	8.8
72	Harry Potter and the Deathly Hallows: Part 1	2010	🇺🇸	David Yates	8.8
73	No Time to Die	2021	🇬🇧	Cary Joji Fukunaga	8.8
74	Kingsman: The Golden Circle	2017	🇺🇸	Matthew Vaughn	8.8
75	Spirited Away	2002	🇯🇵	Hayao Miyazaki	8.8
76	Mad Max: Fury Road	2015	🇦🇺	George Miller	8.8
77	Pirates of the Caribbean: Dead Man’s Chest	2006	🇺🇸	Gore Verbinski	8.8
78	Back to the Future Part II	1989	🇺🇸	Robert Zemeckis	8.8
79	Shrek	2001	🇺🇸	Andrew Adamson	8.8
80	Spider-Man 2	2004	🇺🇸	Sam Raimi	8.8
81	Star Wars: Episode II – Attack of the Clones	2002	🇺🇸	George Lucas	8.8
82	Star Wars: Episode IV – A New Hope	1977	🇺🇸	George Lucas	8.8
83	The Grand Budapest Hotel	2014	🇺🇸	Wes Anderson	8.8
84	Whiplash	2014	🇺🇸	Damien Chazelle	8.8
85	Ratatouille	2007	🇺🇸	Brad Bird	8.7
86	Wreck‐It Ralph	2012	🇺🇸	Rich Moore	8.7
87	How to Train Your Dragon: The Hidden World	2019	🇺🇸	Dean DeBlois	8.7
88	Dunkirk	2017	🇺🇸	Christopher Nolan	8.7
89	Gone Girl	2014	🇺🇸	David Fincher	8.7
90	The Imitation Game	2014	🇺🇸	Morten Tyldum	8.7
91	Klaus	2019	🇪🇸	Sergio Pablos	8.7
92	Dawn of the Planet of the Apes	2014	🇺🇸	Matt Reeves	8.7
93	The Lion King	2019	🇺🇸	Jon Favreau	8.7
94	Le Visiteur du futur	2022	🇫🇷	François Descraques	8.7
95	The Matrix	1999	🇺🇸	Les Wachowski	8.7
96	Pirates of the Caribbean: At World’s End	2007	🇺🇸	Gore Verbinski	8.7
97	Play	2020	🇫🇷	Anthony Marciano	8.7
98	Back to the Future Part III	1990	🇺🇸	Robert Zemeckis	8.7
99	The Shining	1980	🇺🇸	Stanley Kubrick	8.7
100	Shutter Island	2010	🇺🇸	Martin Scorsese	8.7

My consumption over time

YearMin <- min(CritiqueFilm$Année, na.rm = TRUE)
YearMax <- max(CritiqueFilm$Année, na.rm = TRUE)

Year_df <- data.frame(YearMin:YearMax)
for (k in 1:nrow(Year_df)){
  Year_df$CritiqueFilm[k] <- sum(as.numeric(CritiqueFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$NamesFilm[k] <- sum(as.numeric(NamesFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$Total <- Year_df$CritiqueFilm+Year_df$NamesFilm
}
Year_df <- as.data.frame(Year_df)

YearMin_graph <- 1998
YearMax_graph <- 2022

Year_Grade <- select(CritiqueFilm,Année,`Nos notes`)
Year_Grade <- Year_Grade[Year_Grade$Année>=YearMin_graph & Year_Grade$Année<=YearMax_graph,]

Year_Grade_2019 <- round(mean(Year_Grade$`Nos notes`[Year_Grade$Année==2019], na.rm = T),1)
Year_Grade_Min <- round(min(Year_Grade$`Nos notes`, na.rm = T),1)

Year_Grade_graph <- ggplot(Year_Grade, aes(Année, `Nos notes`)) +
  geom_bin2d(binwidth = c(1, 1/3))+
  scale_x_continuous(breaks = seq(YearMin_graph, YearMax_graph, 2))+
  geom_smooth(method = lm, col = white, se = FALSE)+
  scale_fill_gradient(low=purple, high = blue)+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year", fill="Count")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph

This graph represents my film consumption since 1998, the year I was born. The lighter the colour, the more films I have seen with that rating in that period.
Since 2019, the creation of my database, we see a greater diversity of bad and good films with a tendency to be average overall. While the years before the creation of my file have higher average scores because they correspond to good films that “must” be seen.
Eventually, the aim will be to see more films over this period to complete each square from 1 to 5 in score and see a real trend that I imagine is decreasing.

Decades <- as.data.frame.matrix(table(select(NamesFilm,Décénie,Seen)))
colnames(Decades) <- c("To see","Seen")
Decades$Decades <- as.numeric(rownames(Decades))
Decades$Total <- as.numeric(Decades$`To see`+Decades$Seen)

Decades_graph <- ggplot(Decades)+
  geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
  geom_area(aes(x = Decades, y = Seen, fill = "Movies seen"))+
  geom_label(aes(x = Decades, y = Seen, label = paste(Seen)),
            fill=purple,
            colour = white,
            check_overlap = T) +
  scale_fill_manual(values=c(purple, blue))+
  scale_x_continuous(breaks = seq(1930,2020,10))+
  labs(title = "Volume of films to be seen and films seen\naccording to recommendations",
       y="Number of films",x="Decade",
       fill="Legend")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Decades_graph

CritiqueFilm$Différence <- CritiqueFilm$Différence %>% abs()

TopDiff <- filter(CritiqueFilm, CritiqueFilm$Différence > 1.4)

ggplot(CritiqueFilm, aes(`Note Presse`, `Nos notes`)) +
  geom_hex(binwidth = c(.2,.33), color = purple) +
  geom_smooth(col = pink, se = FALSE, method = "lm")+
  geom_abline(intercept = 0, color = grey) +
  geom_label_repel(data = TopDiff, aes(label = TopDiff$`English Title`),
            vjust = "inward", hjust = "inward",
            family="AvertaPE-Regular",
            size = 8/.pt)+
  scale_fill_gradient(low=purple, high = blue)+
  xlim(0.8,5.3)+
  ylim(0.8,5.3)+
  labs(title="Rating of the film compared to the press ratings",
       subtitle="Trend of overnotting")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

My consumption

Date_Evolution <- CritiqueFilm %>%
  select(`Dernier visionnage`) %>%
  `colnames<-`(c("Date")) %>% 
  filter(!is.na(`Date`)) %>%
  arrange(`Date`) %>% 
  mutate(Week = format(`Date`, format = "%V") %>% as.numeric(),
         Day = format(`Date`, format = "%d") %>% as.numeric(),
         Month = format(`Date`, format = "%m") %>% as.numeric(),
         LastYear = `Date`>=(Sys.time()-(365*24*60*60)))

Date_Evolution$Week <- Date_Evolution$Week+53-(format(Sys.time(),"%V") %>% as.numeric())
Date_Evolution$Week[Date_Evolution$Week>52] <- Date_Evolution$Week[Date_Evolution$Week>52]-52
Date_Evolution$Week <- round(Date_Evolution$Week,0)

Date_Evolution_table <- Date_Evolution %>% select(Week,LastYear) %>% table() %>% as.data.frame()

ggplot(Date_Evolution_table, aes(x=Week, y=Freq, group = LastYear, color = LastYear))+
  geom_smooth(method = lm, formula = y ~ splines::bs(x, 7), se = FALSE)+
  labs(title="Identify a decrease and gaps in my consumption",
       subtitle="Film consumption over a year by week",
       color = "Timeline",
       x="Week", y="Count")+
  scale_color_manual(values = c(white, yellow), label = c("Global","This year"))+
  theme(text=element_text(size=12, family="AvertaPE-Regular",colour = white),
        title=element_text(colour = white),
        panel.background = element_rect(fill = purple),
        plot.background = element_rect(fill = purple, color = purple),
        panel.grid.major = element_line(colour = purple),
        panel.grid.minor = element_line(colour = purple),
        panel.border = element_blank(),
        panel.margin.x = NULL,
        panel.margin.y = NULL,
        legend.text = element_text(colour = white),
        legend.title = element_text(colour = white),
        legend.position = "right",
        legend.background = element_blank(),
        legend.key=element_blank(),
        axis.text = element_text(colour = white),
        axis.text.x = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = white),
        plot.caption = element_text(size = 10, color = blue))

Among the annual objectives, film consumption is central and must be more or less stable to achieve them. These curves allow us to identify the periodicity of this consumption according to the weeks on a sliding year with the current month on the right. Keeping the current year’s curve above the overall curve may be a priority to complete my cinephilia and achieve my goals. This filmography is a race against time and can be optimised by segmenting the films to see. Each must-see film is scored from 0 to 100% where 100 is the highest level of recommendation. Few are above 90% and can be considered a priority. The Academy Awards can also be an indicator of “quality” but more importantly of visibility, highlighting a variety of films although this selection is heavily influenced. Despite this sectorisation, the list of films to be seen is getting longer as well as shorter, but with a constant viewing frequency of one film per day, the list should be completed.

Table_Duration <- matrix(data = NA, ncol = 2, nrow = 4) %>%
  as.data.frame() %>% 
  `colnames<-`(c("Data","Caption"))

Table_Duration$Data[1] <- NamesFilm %>% filter(Reco > 0.9) %>% nrow()
Table_Duration$Caption[1] <- "movies with a recommendation higher than 90%"

Table_Duration$Data[2] <- NamesFilm %>% filter(str_detect(NamesFilm$Source,"#Oscar")) %>% nrow()
Table_Duration$Caption[2] <- "Academy Awards nominated films on my must-see list"

Table_Duration$Data[3] <- paste0(round((NamesFilm$Durée[NamesFilm$Seen==FALSE] %>% sum(na.rm = T))/60,0),"h")
Table_Duration$Caption[3] <- "cumulative duration of the films to be seen"

Table_Duration$Data[4] <- NamesFilm %>% filter(Seen==TRUE, `Dernier visionnage` > (Sys.Date() %m-% months(1))) %>% nrow()
Table_Duration$Caption[4] <- "films seen this past month"

Table_Duration %>%
  t() %>%
  as.data.frame() %>%
  kable(escape = F, align = c(rep("c", 10)),col.names = NULL, row.names = FALSE, booktabs = TRUE) %>%
  kable_styling(full_width = T) %>% 
  column_spec(1:4, width = "30em") %>% 
  row_spec(1, bold = T, color = yellow, font_size = 30) %>% 
  row_spec(2, bold = T, color = white)

13	208	2139h	11
movies with a recommendation higher than 90%	Academy Awards nominated films on my must-see list	cumulative duration of the films to be seen	films seen this past month

Directors

DirectorTop <- 40
DirectorHead <- as.data.frame(table(CritiqueFilm$Réalisateur))
DirectorHead <- DirectorHead[order(-DirectorHead$Freq),]
DirectorHead <- DirectorHead[DirectorHead$Freq>=3,]
#DirectorHead <- head(DirectorList,DirectorTop)
colnames(DirectorHead) <- c("Director","Freq")
DirectorHead <- as.data.frame(DirectorHead)
DirectorHead$Director <- as.character(DirectorHead$Director)

for (k in 1:nrow(DirectorHead)){
      DirectorHead$Presse[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Note Presse`,na.rm = T),1)
      DirectorHead$OurGrades[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Nos notes`,na.rm = T),1)
      Pays <- CritiqueFilm$`Emoji Pays`[CritiqueFilm$Réalisateur==DirectorHead$Director[k]] %>% unique()
      DirectorHead$Countries[k] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

DirectorHead$Diff <- DirectorHead$OurGrades-DirectorHead$Presse
DirectorHead$Total <- DirectorHead$Presse + DirectorHead$OurGrades
DirectorHead <- DirectorHead[order(-DirectorHead$Total),]
DirectorHead$Rank <- rownames(DirectorHead) <- 1:nrow(DirectorHead)

DirectorHead <- select(DirectorHead, Rank, Director, Countries, Freq, OurGrades, Presse, Diff, Total)

DirectorHead %>% 
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(8, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")

Rank	Director	Countries	Freq	OurGrades	Presse	Diff	Total
1	Peter Jackson	🇺🇸	4	4.8	4.3	0.5	9.1
2	Christopher Nolan	🇺🇸	7	4.7	4.2	0.5	8.9
3	Brad Bird	🇺🇸	3	4.7	4.2	0.5	8.9
4	George Lucas	🇺🇸	4	4.8	4.1	0.7	8.9
5	Dean DeBlois	🇺🇸	3	4.6	4.3	0.3	8.9
6	Matt Reeves	🇺🇸	3	4.8	4.1	0.7	8.9
7	Matthew Vaughn	🇺🇸	5	4.8	4.0	0.8	8.8
8	Pete Docter	🇺🇸	4	4.5	4.3	0.2	8.8
9	Gore Verbinski	🇺🇸	3	4.8	4.0	0.8	8.8
10	Quentin Tarantino	🇺🇸	3	4.7	4.1	0.6	8.8
11	Sam Mendes	🇬🇧	3	4.8	4.0	0.8	8.8
12	Andrew Stanton	🇺🇸	3	4.5	4.2	0.3	8.7
13	Martin Scorsese	🇺🇸	4	4.4	4.2	0.2	8.6
14	Sam Raimi	🇺🇸	4	4.8	3.8	1.0	8.6
15	Guy Ritchie	🇺🇸	5	4.5	3.9	0.6	8.4
16	Bong Joon-ho	🇰🇷	3	4.4	4.0	0.4	8.4
17	David Yates	🇺🇸	7	4.6	3.8	0.8	8.4
18	Frères Russo	🇺🇸	4	4.3	4.1	0.2	8.4
19	J. J. Abrams	🇺🇸	4	4.6	3.7	0.9	8.3
20	Bryan Singer	🇺🇸	5	4.3	3.9	0.4	8.2
21	David Fincher	🇺🇸	4	4.2	4.0	0.2	8.2
22	John Lasseter	🇺🇸	4	4.3	3.9	0.4	8.2
23	Wes Anderson	🇺🇸	4	4.3	3.9	0.4	8.2
24	Danny Boyle	🇬🇧🇺🇸	3	4.4	3.8	0.6	8.2
25	Steven Spielberg	🇺🇸	6	4.2	3.9	0.3	8.1
26	Clint Eastwood	🇺🇸	3	4.0	4.1	-0.1	8.1
27	Jon Watts	🇺🇸	3	4.2	3.9	0.3	8.1
28	James Mangold	🇺🇸	5	4.1	3.9	0.2	8.0
29	Rian Johnson	🇺🇸	3	4.4	3.6	0.8	8.0
30	Zack Snyder	🇺🇸	7	4.3	3.6	0.7	7.9
31	Ridley Scott	🇺🇸	6	3.9	4.0	-0.1	7.9
32	George Miller	🇺🇸🇦🇺	3	4.4	3.5	0.9	7.9
33	Robert Zemeckis	🇺🇸	8	4.1	3.8	0.3	7.9
34	James Gunn	🇺🇸	4	4.1	3.8	0.3	7.9
35	Gary Trousdale et Kirk Wise	🇺🇸	3	4.1	3.8	0.3	7.9
36	Hayao Miyazaki	🇯🇵	5	3.6	4.2	-0.6	7.8
37	Francis Lawrence	🇺🇸	4	4.2	3.6	0.6	7.8
38	Ang Lee	🇺🇸	3	4.2	3.6	0.6	7.8
39	Chad Stahelski	🇺🇸	3	4.2	3.6	0.6	7.8
40	Marc Webb	🇺🇸	3	4.1	3.7	0.4	7.8
41	Tim Burton	🇺🇸	9	3.9	3.8	0.1	7.7
42	Andrew Adamson	🇺🇸	4	4.1	3.6	0.5	7.7
43	Pierre Coffin	🇺🇸	3	3.9	3.8	0.1	7.7
44	John Musker et Ron Clements	🇺🇸	5	3.7	3.9	-0.2	7.6
45	Tom McGrath	🇺🇸	5	4.1	3.5	0.6	7.6
46	Carlos Saldanha	🇺🇸	4	3.8	3.8	0.0	7.6
47	Philippe Lacheau	🇫🇷	4	3.8	3.8	0.0	7.6
48	Jennifer Yuh Nelson	🇺🇸	3	3.8	3.7	0.1	7.5
49	Taika Waititi	🇺🇸	3	4.0	3.5	0.5	7.5
50	Jon Favreau	🇺🇸	4	3.9	3.5	0.4	7.4

Director_graph <- ggplot(DirectorHead, aes(x = OurGrades, y = Freq))+
  geom_vline(xintercept = mean(DirectorHead$OurGrades,na.rm = T),
            col = grey)+
  geom_point(aes(size = Freq, colour = OurGrades > mean(OurGrades,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(DirectorHead$OurGrades),5.5)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Director,": ",OurGrades),colour = OurGrades > mean(OurGrades,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
  geom_text(aes(x=mean(OurGrades,na.rm = T)-0.1,label=round(mean(OurGrades,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  labs(title="Directors by volume and score",
       subtitle = "Sub",
       y="Frequency", x="Grade")
Director_graph

AListed <- CritiqueFilm %>% 
  select(Réalisateur, `Notes cummulées`) %>%
  `colnames<-`(c("Réalisateur","Notes")) %>% 
  group_by(Réalisateur) %>% 
  mutate(Max = max(Notes),
         Min = min(Notes),
         Mean = round(mean(Notes),1),
         Count = n()) %>% 
  arrange(desc(Count)) %>% 
  select(-Notes) %>% 
  unique() %>% 
  head(15) %>% 
  arrange(desc(Mean))

ggplot(AListed, aes(y = Réalisateur)) +
  geom_segment(aes(x=Min, xend=Max, y=Réalisateur, yend=Réalisateur),color="grey", size=.5)+
  geom_point(aes(x=Max,color="Max"), size=2)+
  geom_point(aes(x=Min,color="Min"), size=2)+
  geom_point(aes(x=Mean), color=yellow, size=7)+
  geom_text(aes(x=Mean, label = Count), col = purple, family = "AvertaPE-Black")+
  scale_y_discrete(limits = rev(AListed$Réalisateur))+
  scale_color_manual(values = c("Min" = pink, "Max" = blue), labels = c("Minimum", "Maximum", "Range"))+
  labs(title = "Director ratings with range between worst and best film",
       #subtitle = "Test",
       x = "Grades", y = NULL,
       color = "Grades",
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

Genre

Genre <- CritiqueFilm$Genre
Genre <- unlist(strsplit( Genre," / "))
Genre <- as.data.frame(table(Genre))
Genre <- Genre[order(-Genre$Freq),]
Genre$Freq <- round(1+Genre$Freq/10,0)


Genre1 <- select(CritiqueFilm,`Genre 1`,`Nos notes`)
Genre2 <- select(CritiqueFilm,`Genre 2`,`Nos notes`)
colnames(Genre2) <- colnames(Genre1) <- c("Genre","Note")
Genre_merge <- rbind(Genre1,Genre2)
Genre_merge


for(z in 1:nrow(Genre)){
  Genre$Notes[z] <- round(mean(Genre_merge$Note[Genre_merge$Genre == Genre$Genre[z]], na.rm = T),1)
}


wordcloud(words = Genre$Genre, freq = Genre$Freq, min.freq = 1,
          max.words=100, random.order=FALSE, rot.per=0, 
          colors=rev(mycols3),
          family = "AvertaPE-Black")

Genre <- head(Genre,sum(as.numeric(Genre$Freq>1))) %>% as.data.frame()

Genre_graph <- ggplot(Genre, aes(x = Notes, y = Freq))+
  xlim(min(Genre$Notes),max(Genre$Notes)+0.3)+
  geom_vline(xintercept = mean(Genre$Notes,na.rm = T),
            col = grey)+
  geom_point(aes(colour = Notes > mean(Notes,na.rm = T)))+ #I use a formula to have conditional colours
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Genre,": ",Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
    geom_text(aes(x=mean(Notes,na.rm = T)-0.05,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 0,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  labs(title="Genre by volume and score",
       subtitle = Sub,
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Genre_graph

Genre_radar <- rbind(CritiqueFilm %>%
        select(Grade, `Genre 1`) %>%
        `colnames<-`(c("Grade","Genre")),
  CritiqueFilm %>%
    select(Grade, `Genre 2`) %>%
    `colnames<-`(c("Grade","Genre"))) %>% 
  filter(!is.na(Genre), Grade == "A" | Grade == "E") %>% 
  group_by(Grade, Genre) %>% 
  summarise(Count = n()) %>% 
  arrange(desc(Count)) %>%
  ungroup() %>% 
  mutate(Count = ifelse(Count > 75,75,Count),
    Count = BBmisc::normalize(Count, method="range"))

Top_Genre <- (Genre_radar %>% group_by(Genre) %>% summarise(Sum = sum(Count)) %>% arrange(desc(Sum)) %>% head(10))$Genre

Genre_radar <- Genre_radar %>% 
  filter(Genre %in% Top_Genre)

Skill_radar <-xtabs(formula=Count~Grade+Genre,data=Genre_radar) %>%
  as.data.frame.matrix()

Skill_radar <- Skill_radar %>%
  mutate(Grade = row.names(Skill_radar)) %>% 
  select(Grade, everything()) %>%
  `rownames<-`(1:nrow(Skill_radar)) %>% 
  select(Grade, c(Top_Genre))

library(ggradar)

Skill_radar_graph <- Skill_radar %>%
  ggradar(grid.label.size = 4,  # Affects the grid annotations (0%, 50%, etc.)
          axis.label.size = 3.2,
          group.point.size = 3,   # Simply the size of the point 
          group.colours = c(blue, pink))+
  labs(title = paste("Genre comparison between A-Listed and E-Listed"),
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        legend.position = c(-0.1,0.2),
        legend.justification = "left",
        legend.text = element_text(size = 10),
        legend.key = element_rect(fill = NA, color = NA),
        legend.background = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
  
Skill_radar_graph

Actors

Acteur <- Acteur[order(-Acteur$Freq),]

Acteur_graph <- ggplot(Acteur, aes(x = Notes, y = Freq))+
  geom_vline(xintercept = mean(Acteur$Notes,na.rm = T),
            col = grey)+
  geom_text(aes(x=mean(Notes,na.rm = T)-0.1,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  geom_point(aes(size = Freq, colour = Notes > mean(Notes,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(Acteur$Notes),6)+
  geom_text(aes(label = paste(Acteur,Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            family="AvertaPE-Regular",
            size = 9/.pt,
            check_overlap = T) +
  scale_color_manual(values=mycols2) +
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_graph

rownames(Acteur) <- Acteur$Rank <- 1:length(Acteur$Acteur)



for (i in 1:nrow(Acteur)){
  Pays <- Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]][!is.na(Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]])] %>% unique()
  Acteur$Countries[i] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

Acteur <- select(Acteur, Rank, Acteur, Countries, Freq, Notes, Presse, Total)

Acteur %>%
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(7, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")

Rank	Acteur	Countries	Freq	Notes	Presse	Total
1	Hugh Jackman	🇺🇸	14	3.8	3.5	7.3
2	Robert Downey Jr.	🇺🇸	13	4.3	3.9	8.2
3	Chris Evans	🇺🇸🇰🇷	12	4.1	3.6	7.7
4	Brad Pitt	🇺🇸🇫🇷	12	3.9	3.5	7.4
5	Johnny Depp	🇺🇸	11	4.2	3.8	8.0
6	Tom Hanks	🇺🇸	11	4.1	3.9	8.0
7	Daniel Radcliffe	🇳🇿🇺🇸	11	4.1	3.8	7.9
8	Robert De Niro	🇺🇸	11	3.7	3.6	7.3
9	Ryan Reynolds	🇺🇸	11	3.6	3.5	7.1
10	Seth Rogen	🇺🇸🇬🇧	11	3.3	3.0	6.3
11	Emma Watson	🇺🇸	10	4.2	3.9	8.1
12	Chris Hemsworth	🇺🇸	10	4.2	3.6	7.8
13	Ben Stiller	🇺🇸	10	3.8	3.3	7.1
14	Leonardo DiCaprio	🇺🇸	9	4.6	4.1	8.7
15	Rupert Grint	🇺🇸	9	4.3	3.9	8.2
16	Scarlett Johansson	🇺🇸	9	3.7	3.7	7.4
17	Zac Efron	🇺🇸	9	3.3	3.1	6.4
18	Natalie Portman	🇺🇸🇫🇷	8	4.4	3.8	8.2
19	Daniel Craig	🇺🇸🇬🇧	8	4.5	3.6	8.1
20	Chris Pratt	🇺🇸	8	4.3	3.7	8.0
21	Angelina Jolie	🇺🇸	8	4.1	3.8	7.9
22	Robin Williams	🇺🇸	8	4.1	3.6	7.7
23	Jake Gyllenhaal	🇺🇸🇨🇦	8	4.0	3.5	7.5
24	Bradley Cooper	🇺🇸	8	3.7	3.6	7.3
25	Marion Cotillard	🇫🇷🇺🇸	8	3.8	3.5	7.3
26	Joseph Gordon-Levitt	🇺🇸	8	3.7	3.4	7.1
27	Will Smith	🇺🇸	8	3.6	3.3	6.9
28	Michaël Youn	🇫🇷	8	2.9	2.2	5.1
29	Ramzy Bedia	🇫🇷	8	2.2	1.9	4.1
30	Jennifer Lawrence	🇺🇸	7	4.0	3.7	7.7
31	Emma Stone	🇺🇸	7	3.7	3.8	7.5
32	Jack Black	🇺🇸	7	3.9	3.6	7.5
33	Anne Hathaway	🇺🇸	7	3.7	3.6	7.3
34	Robert Pattinson	🇺🇸	7	3.9	3.4	7.3
35	John Leguizamo	🇺🇸	7	3.3	3.6	6.9
36	Owen Wilson	🇺🇸	7	3.6	3.3	6.9
37	Kevin Hart	🇺🇸	7	3.3	3.4	6.7
38	Dwayne Johnson	🇺🇸	7	3.2	3.4	6.6
39	Jean Dujardin	🇫🇷	7	3.5	3.0	6.5
40	Kristen Stewart	🇨🇱🇺🇸	7	3.3	2.9	6.2
41	Anna Faris	🇺🇸	7	2.9	2.4	5.3
42	Ewan McGregor	🇺🇸	6	4.5	3.8	8.3
43	Kirsten Dunst	🇺🇸	6	4.4	3.8	8.2
44	Samuel L. Jackson	🇺🇸	6	4.3	3.9	8.2
45	Christian Bale	🇺🇸	6	4.1	4.0	8.1
46	Woody Harrelson	🇺🇸	6	4.2	3.9	8.1
47	Bruce Willis	🇺🇸	6	4.2	3.8	8.0
48	Chris Pine	🇬🇧🇺🇸	6	4.1	3.7	7.8
49	Tom Holland	🇺🇸	6	4.0	3.8	7.8
50	Adam Driver	🇺🇸	6	4.0	3.7	7.7

AListed_Actor <- select(Acteur_merge,Acteur,Grade)
AListed_Actor <- as.data.frame(table(AListed_Actor))

AListed_Actor <- AListed_Actor[as.character(AListed_Actor$Acteur) %in% as.character(head(Acteur,10)$Acteur),]
AListed_Actor <- AListed_Actor[AListed_Actor$Freq>0,]


for (r in 1:nrow(AListed_Actor)){
  AListed_Actor$Total[r] <- sum(AListed_Actor$Freq[AListed_Actor$Acteur==AListed_Actor$Acteur[r]], na.rm=T)
}
AListed_Actor <- AListed_Actor[order(-AListed_Actor$Total),]

AListed_Actor$Grade <- as.numeric(AListed_Actor$Grade)-6
AListed_Actor$Grade <- abs(AListed_Actor$Grade)


AListed_graph <- ggplot(AListed_Actor,aes(x = Acteur, y = Grade))+
  geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
  geom_point(size=AListed_Actor$Freq, color = blue)+
  scale_size_continuous(range=c(0.5,10)) +
  scale_x_discrete(guide = guide_axis(n.dodge=1),
                   limits=as.character(unique(AListed_Actor$Acteur)))+
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Grade", x="Actor")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  coord_flip()
AListed_graph

Acteur <- Acteur %>% arrange(-Freq)

Actor_list <- head(Acteur$Acteur,50) %>% as.character()
Actor_matrix <- matrix(ncol = Actor_list %>% length(), nrow=Actor_list %>% length())
colnames(Actor_matrix) <- Actor_list
rownames(Actor_matrix) <- Actor_list

for (c in 1:ncol(Actor_matrix)){
  for (l in 1:nrow(Actor_matrix)){
    Actor_matrix[l,c] <-
      sum(CritiqueFilm$`Acteur 2`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
    Actor_matrix[l,c] <- Actor_matrix[l,c]+
      sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
    Actor_matrix[l,c] <- Actor_matrix[l,c]+
      sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 2`]==rownames(Actor_matrix)[l],na.rm=T)
  }
}

Actor_matrix <- Actor_matrix %>% as.data.frame()
Actor_matrix$from <- rownames(Actor_matrix) %>% as.character()
Actor_matrix <- Actor_matrix[,c(ncol(Actor_matrix),1:(ncol(Actor_matrix)-1))]


# Transform the adjacency matrix in a long format
connect <- Actor_matrix %>%
  as.data.frame() %>%
  gather(key="to", value="value", -1) %>%
  mutate(to = gsub("\\.", " ",to)) %>%
  na.omit()

connect <- connect[connect$value>0,]

# Number of connection per person
coauth <- c( as.character(connect$from), as.character(connect$to)) %>%
  as_tibble() %>%
  group_by(value) %>%
  summarize(n=n())
colnames(coauth) <- c("name", "n")
#dim(coauth)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Find community
com <- walktrap.community(mygraph)
#max(com$membership)

#Reorder dataset and make the graph
coauth <- coauth %>% 
  mutate( grp = com$membership) %>%
  arrange(grp) %>%
  mutate(name=factor(name, name))

colfunc <- colorRampPalette(c(purple,pink,blue,yellow))

scale_col <- colfunc(max(coauth$grp))

# keep only this people in edges
connect <- connect %>%
  filter(from %in% coauth$name) %>%
  filter(to %in% coauth$name)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Make the graph
ggraph(mygraph, layout="linear") + 
  geom_edge_arc(edge_colour=grey, fold=TRUE) +
  geom_node_point(aes(size=n, color=as.factor(grp), fill=grp)) +
  scale_color_manual(values =scale_col)+
  scale_size_continuous(range=c(0.5,5)) +
  geom_node_text(aes(label=name), angle=65, hjust=1, nudge_y = -0.5, size=3) +
  expand_limits(x = c(-1.2, 1.2), y = c(-5,0))+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none")

International

cat("
<style>
.leaflet-container {
   background: #FFF;
}
</style>
")

map <- ne_countries() %>% as.data.frame()

write.csv(map$sovereignt,"map_Countries.csv")

map <- ne_countries()
map$freq <- 0

for (s in 1:nrow(map)){
  map$freq[s] <- sum(CritiqueFilm$`Pays d'origine`==map$sovereignt[s], na.rm = T)
  map$best_movie[s] <- CritiqueFilm$`Titre du film`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]], na.rm = T)==CritiqueFilm$`Nombre Classement`]
  map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}

for (s in 1:nrow(map)){
  map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}


map$freq[map$freq==0] <- NA

map$Grade <- case_when(
  map$freq==1 ~ 1,
  map$freq<5 ~ 2,
  map$freq<100 ~ 3,
  map$freq<500 ~ 4,
  !is.na(map$freq) ~ 5,
  is.na(map$freq) ~ 0
)

map$Label <- case_when(
  map$freq==1 ~ "1",
  map$freq<5 ~ "<5",
  map$freq<100 ~ "<100",
  map$freq<500 ~ "<500",
  !is.na(map$freq) ~ ">500",
  is.na(map$freq) ~ "0"
)


pal <- colorBin(
  palette = mycols5, domain = map$Grade,
  bins = seq(0, max(map$Grade, na.rm = TRUE), by = 1)
)

map$labels <- paste0(
  "<strong> Country: </strong> ", map$sovereignt, "<br/> ",
  "<strong> Number of movies seen : </strong> ", round(map$freq,0), "<br/> ",
  "<strong> Best movie for this country : </strong> ", map$best_movie," : ",map$best_movie_rate,"/10", "<br/> "
) %>%
  lapply(htmltools::HTML)

LeafMap <- leaflet(map) %>%
  setMapWidgetStyle(list(background= "white")) %>%
  setView(lng = 0, lat = 30, zoom = 1.3) %>%
  addPolygons(
    fillColor = ~ pal(Grade),
    color = purple,
    weight = 1,
    opacity = 1,
    fillOpacity = 1,
    label = ~labels,
    highlight = highlightOptions(
      color = pink,
      bringToFront = TRUE,
      fill = 1, fillOpacity=1
    )
  ) %>%
  addLegend(
    pal = pal,
    values = ~Grade, #c("0","1","<5","<100","<500",">500"),
    opacity = 1,
    title = "Freq"
  )
LeafMap

Sagas

AListed_Saga <- select(CritiqueFilm,Saga,Grade)
AListed_Saga <- as.data.frame(table(AListed_Saga))

Table_saga <- as.data.frame(table(CritiqueFilm$Saga))
Table_saga <- Table_saga[order(-Table_saga$Freq),]
Table_saga <- Table_saga[Table_saga$Var1 != "Saga", ] 

AListed_Saga <- AListed_Saga[AListed_Saga$Saga %in% as.character(head(Table_saga,10)$Var1),]
AListed_Saga <- AListed_Saga[AListed_Saga$Freq>0,]


for (r in 1:nrow(AListed_Saga)){
  AListed_Saga$Total[r] <- sum(AListed_Saga$Freq[AListed_Saga$Saga==AListed_Saga$Saga[r]], na.rm=T)
}
AListed_Saga <- AListed_Saga[order(-AListed_Saga$Total),]

AListed_Saga$Grade <- as.numeric(AListed_Saga$Grade)-6
AListed_Saga$Grade <- abs(AListed_Saga$Grade)


AListed_graph <- ggplot(AListed_Saga,aes(x = Saga, y = Grade))+
  geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
  geom_point(size=AListed_Saga$Freq, color = blue)+
  scale_x_discrete(guide = guide_axis(n.dodge=2),
                   limits=as.character(unique(AListed_Saga$Saga)))+
  ylim(0,5)+
  scale_size_discrete(range=c(0.5,20)) +
  labs(title="Saga with the best average according to their frequency",
       subtitle="",
       y="Grade", x="Saga")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
AListed_graph

DecadeMin_graph <- 1930
DecadeMax_graph <- 2020

Decade_Grade_Saga <- select(CritiqueFilm,Saga,Année,`Nos notes`)
Decade_Grade_Saga <- Decade_Grade_Saga[Decade_Grade_Saga$Année>=DecadeMin_graph & Decade_Grade_Saga$Année<=DecadeMax_graph,]
Decade_Grade_Saga_table <- as.data.frame(table(Decade_Grade_Saga$Saga))
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(-Decade_Grade_Saga_table$Freq),]
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[Decade_Grade_Saga_table$Var1!="Saga",]
colnames(Decade_Grade_Saga_table)[1] <- "Saga"

for (s in 1:length(Decade_Grade_Saga_table$Saga)){
  Decade_Grade_Saga_table$Grade[s] <- round(mean(CritiqueFilm$`Nos notes`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga],na.rm = T),1)
  Decade_Grade_Saga_table$Presse[s] <- round(mean(CritiqueFilm$`Note Presse`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Diff[s] <- round(mean(CritiqueFilm$Différence[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Duration[s] <- round(sum(CritiqueFilm$Durée[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T)/60,1)
}

rownames(Decade_Grade_Saga_table) <- 1:length(Decade_Grade_Saga_table$Saga)

knitr::kable(Decade_Grade_Saga_table) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  scroll_box(width = "100%", height = "370px")

Saga	Freq	Grade	Presse	Diff	Duration
Disney	70	3.9	3.8	0.4	123.1
Marvel	46	4.1	3.6	0.5	110.2
DreamWorks	31	3.8	3.6	0.5	47.3
DC	15	3.8	3.5	0.6	42.9
Ghibli	12	3.5	3.9	0.5	22.2
Star Wars	12	4.5	3.8	0.7	26.5
Harry Potter	10	4.3	3.8	0.5	28.0
American Pie	8	2.8	2.5	0.4	12.9
James Bond	6	4.3	3.6	0.8	15.8
Jurassic Park	5	4.3	3.3	0.9	12.4
L’Âge de glace	5	3.2	3.4	0.3	8.8
Pirates des Caraïbes	5	4.6	3.8	0.8	12.1
Pokémon	5	3.3	3.0	0.5	8.4
Saw	5	3.1	2.8	0.5	8.2
Twilight	5	3.6	3.1	0.5	10.2
Astérix	4	3.9	3.3	0.6	6.5
Hunger Games	4	4.2	3.6	0.5	9.1
La Planète des Singes	4	4.7	4.0	0.7	8.1
La Terre du Milieu	4	4.9	4.3	0.6	14.0
Scary Movie	4	2.7	2.2	0.5	5.7
Transformers	4	3.4	3.1	0.5	9.4
American Nightmare	3	3.4	3.1	0.4	4.9
Ducobu	3	2.6	2.0	0.6	4.7
Fast & Furious	3	2.4	2.8	0.5	5.8
Hellboy	3	3.3	2.8	0.6	6.0
Hôtel Transylvanie	3	3.6	3.6	0.1	6.2
John Wick	3	4.2	3.6	0.5	5.9
Jumanji	3	3.9	3.5	0.3	5.8
Klapisch	3	2.9	3.7	0.8	6.1
La Nuit au Musée	3	3.6	3.0	0.6	5.2
Les Schtroumpfs	3	3.3	2.9	0.4	5.0
Lucky Luke	3	2.8	1.6	1.2	4.6
Moi, Moche et Méchant	3	3.9	3.8	0.2	4.7
Mon beau-père et moi	3	3.2	3.1	0.2	5.3
Narnia	3	3.5	3.2	0.4	6.8
Retour vers le futur	3	4.6	4.3	0.4	5.7
Sherlock Holmes	3	4.3	3.9	0.7	6.3
Star Trek	3	4.7	3.8	0.9	6.4
Tarantino	3	4.7	4.1	0.5	7.8
Very Bad Trip	3	4.0	3.4	0.6	5.0
300	2	4.0	3.6	0.4	3.6
Babysitting	2	3.9	3.8	0.1	3.0
Borat	2	2.8	2.7	0.1	2.9
Comme des bêtes	2	3.0	3.6	0.6	2.9
Comment tuer son boss?	2	3.2	2.9	0.4	3.4
Destination finale	2	3.0	2.7	0.3	2.9
Dr. Seuss	2	3.4	3.3	0.7	3.2
Happy Feet	2	4.3	3.2	1.2	3.5
Jump Street	2	3.5	3.5	0.0	3.7
Kingsman	2	4.8	4.0	0.8	6.7
Le Choc des Titans	2	4.2	2.4	1.8	3.4
Les Nouvelles Aventures	2	1.3	1.5	0.2	3.4
Matrix	2	4.2	4.0	0.3	4.6
Nos pires voisins	2	3.4	2.6	0.8	3.1
OSS 117	2	3.8	3.3	0.5	5.2
Papa ou Maman	2	3.6	3.5	0.2	3.0
Percy Jackson	2	3.3	2.7	0.6	3.8
Red	2	3.9	3.5	0.4	3.8
Rio	2	3.9	3.8	0.4	3.2
Sister Act	2	3.2	3.5	0.2	3.5
Ted	2	3.8	3.3	0.5	3.7
Zombieland	2	3.9	3.9	0.2	3.1
Agatha Christie	1	4.2	3.3	0.9	4.1
Alien	1	4.0	3.3	0.7	2.1
Asimov	1	3.7	3.7	0.0	1.7
Assassin’s Creed	1	3.6	2.9	0.7	1.9
Blade Runner	1	2.8	4.2	1.4	2.0
Breaking Bad	1	4.2	3.7	0.5	2.0
Dernier train pour Busan	1	3.9	4.1	0.2	2.0
Dragon Quest	1	4.1	3.8	0.3	1.7
Ghost in the Shell	1	3.2	3.3	0.1	1.8
Happy Birthdead	1	3.7	3.3	0.4	1.6
Hawking	1	4.2	4.3	0.1	2.0
His Dark Materials	1	4.0	2.8	1.2	1.9
Jackass	1	1.0	2.2	1.2	1.4
Kick-Ass	1	5.0	4.0	1.0	2.0
La Tour Montparnasse	1	3.4	2.4	1.0	1.5
Le Labyrinthe	1	3.8	3.8	0.0	1.9
Le Petit Nicolas	1	2.4	2.4	0.0	1.6
Lego	1	3.8	3.0	0.8	1.2
LEGO	1	3.6	4.0	0.4	1.7
Les Visiteurs	1	1.0	1.5	0.5	1.8
Limitless	1	5.0	3.9	1.1	1.8
Mad Max	1	4.6	4.2	0.4	2.0
Matt Groening	1	4.4	3.8	0.6	1.4
Men in Black	1	3.4	2.5	0.9	1.9
MonsterVerse	1	4.2	3.8	0.4	3.1
Mythologie	1	4.2	3.6	0.6	2.7
Ocean	1	3.0	3.0	0.0	1.8
Orelsan	1	3.2	3.4	0.2	1.5
Prince of Persia	1	4.4	3.3	1.1	1.9
Sans un bruit	1	4.3	3.8	0.6	3.1
Seuls	1	2.4	2.3	0.1	1.6
Sonic	1	3.4	3.3	0.1	1.6
Tintin	1	4.0	3.6	0.4	1.8
Titeuf	1	3.8	3.1	0.7	2.7
Transperceneige	1	4.6	3.5	1.1	2.1

Decade_Grade_Saga_table_top <- head(Decade_Grade_Saga_table,10)
Decade_Grade_Saga <- Decade_Grade_Saga[as.character(Decade_Grade_Saga$Saga) %in% as.character(Decade_Grade_Saga_table_top$Saga),]

Year_Grade_graph <- ggplot(Decade_Grade_Saga, aes(Année, Saga)) +
  geom_bin2d(binwidth = c(10,1))+
  scale_x_continuous(breaks = seq(DecadeMin_graph, DecadeMax_graph, 10))+
  scale_fill_gradient(low=purple, high = blue)+
  labs(y="Number of films",x="Decade")+
  labs(title="Count of films per Saga and Decades",
       subtitle=paste0("from ",DecadeMin_graph," to ",DecadeMax_graph),
       y="Saga", x="Decade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph

Decade_Grade_Saga_table <- head(Decade_Grade_Saga_table,15)

Saga_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Grade, y = Duration))+
  geom_vline(xintercept = mean(Decade_Grade_Saga_table$Grade,na.rm = T),
            col = grey)+
  geom_text(aes(x=mean(Grade,na.rm = T)-0.1,label=round(mean(Grade,na.rm = T),2), y=max(Duration)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  geom_point(aes(colour = Grade > mean(Grade,na.rm = T)))+
  xlim(min(Decade_Grade_Saga_table$Grade),5.2)+
  geom_text(aes(label = paste(Saga,Grade),colour = Grade > mean(Grade,na.rm = T)),
            hjust=-0.1,
            vjust=-0.2,
            family="AvertaPE-Regular",
            size = 9/.pt,
            check_overlap = T) +
  scale_color_manual(values=mycols2) +
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Duration", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Saga_graph

CritiqueFilm$Durée <- round(CritiqueFilm$Durée/2,0)*2
CritiqueFilm$Durée[CritiqueFilm$Durée > min(boxplot.stats(CritiqueFilm$Durée)$out)] <- NA

Duration_graph <- ggplot(CritiqueFilm, aes(x = Durée, y = `Nos notes`))+
  geom_bar(stat = "summary", fun = "mean", fill=purple)+
  geom_smooth(color=pink)+
  labs(title="Average rating per film duration",
       y="Grade", x="Duration (min)")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Duration_graph

Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(Decade_Grade_Saga_table$Diff),]

AListed_Saga_Table_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Diff, y = Saga))+
  geom_bar(stat = "summary", fun = "sum", fill=purple)+
  geom_vline(xintercept = 0, col = blue)+
  scale_y_discrete(guide = guide_axis(n.dodge=1),
                   limits=as.character(unique(Decade_Grade_Saga_table$Saga)))+
  labs(title="Difference between our Grades and Press' Grades",
       y="Saga", x="Difference")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

AListed_Saga_Table_graph

Consommation

Weekdays_table <- weekdays(as.Date(CritiqueFilm$`Dernier visionnage`))
Weekdays_order <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

Weekdays_table <- as.data.frame(Weekdays_table)
colnames(Weekdays_table)[1] <- "Days"

Weekdays_table <- Weekdays_table %>% filter(!is.na(Weekdays_table$Days))

Weekdays_graph <- ggplot(Weekdays_table, aes(x=Days))+
  geom_bar(stat = "count", fill = purple) +
  scale_x_discrete(limits = Weekdays_order)+
  labs(title="Count of films per day of the week",
       y="Count", x="Days")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Weekdays_graph

Month_table <- as.numeric(format(as.Date(CritiqueFilm$`Date de sortie`, format = "%Y-%m-%d"), "%m"))
Month_table <- as.data.frame(table(Month_table))
Month_table$Freq <- round(Month_table$Freq*100/sum(Month_table$Freq),1)

Month_graph <- ggplot(Month_table, aes(x=Month_table, y=Freq))+
  geom_hline(yintercept = 100/12,col = grey)+
  geom_bar(stat = "identity", fill=grey, width = 0.01)+
  geom_point(size = 2, color = blue)+
  geom_text(aes(label = paste0(Freq,"%")),
            size=3,
            hjust=0.5,
            vjust=-1,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_x_discrete(limits = 1:12, label = month.abb[1:12])+
  labs(title="Percentage of films seen per month",
       y="Percent", x="Month")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Month_graph

# library(ggraph)
# library(igraph)
# library(tidyverse)
# library(viridis)
# 
# 
# Distribution <- select(CritiqueFilm,`Maison mère`,`Maison de distribution`)
# Distribution2 <- Distribution %>% count(`Maison de distribution`)
# 
# for (d in 1:nrow(Distribution2)){
#   Distribution2$`Maison mère`[d] <- Distribution$`Maison mère`[Distribution2$`Maison de distribution`[d]==Distribution$`Maison de distribution`]
# }
# Distribution2
# 
# Distribution2 <- Distribution2[Distribution2$n>=3,]
# Distribution2 <- Distribution2[Distribution2$`Maison mère`!="Autre",]
# 
# Distribution2$`Maison mère` <- str_replace_all(Distribution2$`Maison mère`," ","")
# Distribution2$`Maison de distribution` <- str_replace_all(Distribution2$`Maison de distribution`," ","")
# 
# Distribution2$name <- paste0("Distribution.",Distribution2$`Maison mère`,".",Distribution2$`Maison de distribution`)
# Distribution2$from <- paste0("Distribution.",Distribution2$`Maison mère`)
# 
# vertices <- Distribution2 %>% select(name,n,`Maison de distribution`)
# colnames(vertices) <- c("name","size","shortName")
# 
# edges <- Distribution2 %>% select(from,name)
# colnames(edges) <- c("from","to")
# 
# d1 <- data.frame(from="origin", to=paste("group", seq(1,10), sep=""))
# d2 <- data.frame(from=rep(d1$to, each=10), to=paste("subgroup", seq(1,100), sep="_"))
# hierarchy <- rbind(d1, d2)
# 
# vertices <- data.frame(name = unique(c(as.character(hierarchy$from), as.character(hierarchy$to))) ) 
# 
# vertices$id <- NA
# myleaves <- which(is.na( match(vertices$name, edges$from) ))
# nleaves <- length(myleaves)
# vertices$id[ myleaves ] <- seq(1:nleaves)
# vertices$angle <- 90 - 360 * vertices$id / nleaves
# vertices$hjust <- ifelse( vertices$angle < -90, 1, 0)
# vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
# 
# mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
# 
# ggraph(mygraph, layout = 'dendrogram', circular = TRUE) + 
#   geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05)) +
#   geom_conn_bundle(data = get_con(from = from, to = to), alpha=0.2, colour="skyblue", width=0.9) +
#   geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=1.5, alpha=1) +
#   theme_void() +
#   theme(
#     legend.position="none",
#     plot.margin=unit(c(0,0,0,0),"cm"),
#   ) +
#   expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))

Conclusion

#Les Derniers films vus
Top <- 10

Lastfilms <- CritiqueFilm[!is.na(CritiqueFilm$`Dernier visionnage`),]
Lastfilms <- select(Lastfilms,`English Title`,`Date de sortie`,`Pays d'origine`, Réalisateur,`Notes cummulées`,`Dernier visionnage`)
colnames(Lastfilms) <- c("Title","Date","Country","Director","Grade","Last Visio")
Lastfilms$`Last Visio` <- as.Date(as.POSIXct(Lastfilms$`Last Visio`))
Lastfilms$Date <- format(as.Date(Lastfilms$Date, "%m/%d/%y"),"%b %Y")

Lastfilms$Visio_num <- as.numeric(Lastfilms$`Last Visio`)
Lastfilms <- Lastfilms[order(-Lastfilms$Visio_num),]
Lastfilms <- head(Lastfilms,Top)
Lastfilms <- select(Lastfilms, -Visio_num)
Lastfilms$Country[Lastfilms$Country=="United States of America"] <- "USA"


Lastfilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(5, bold = T, color = white)

Title	Date	Country	Director	Grade	Last Visio
X-Men: Days of Future Past	May 2014	USA	Bryan Singer	9.0	2022-10-21
Mulan 2 : La Mission de l’Empereur	Feb 2005	USA	Lynne Southerland & Darrell Rooney	5.1	2022-10-20
Go West: A Lucky Luke Adventure	Dec 2007	France	Olivier Jean-Marie	6.2	2022-10-14
The Wolverine	Jul 2013	USA	James Mangold	7.1	2022-10-09
Chicken Little	Dec 2005	USA	Mark Dindal	5.0	2022-10-09
X-Men: First Class	May 2011	USA	Matthew Vaughn	8.7	2022-10-08
Beauty and the Beast	Oct 1992	USA	Gary Trousdale et Kirk Wise	8.1	2022-10-08
X-Men Origins: Wolverine	Apr 2009	USA	Gavin Hood	6.4	2022-10-07
Super	Apr 2011	USA	James Gunn	7.0	2022-10-04
Le crocodile du Botswanga	Feb 2014	France	Lionel Steketee et Fabrice Éboué	5.2	2022-10-04

DA <- select(CritiqueFilm,`Date de sortie`,DA)

DA$`Date de sortie` <- as.numeric(format(as.Date(DA$`Date de sortie`, format = "%Y-%m-%d"), "%Y"))
DA$`Date de sortie` <- round(DA$`Date de sortie`/10,0)*10

DA <- as.data.frame.matrix(table(DA))
colnames(DA) <- c("Total","Anime")
DA$Decades <- rownames(DA)
DA$Total <- DA$Total+DA$Anime

DA_graph <- ggplot(DA)+
  geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
  geom_area(aes(x = Decades, y = Anime, fill = "Movies seen"))+
  geom_label(aes(x = Decades, y = Anime, label = paste(Anime)),
            fill=purple,
            colour = white,
            #size=3,
            hjust=0.5,
            vjust=-2,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_fill_manual(values=c(purple, blue))+
  labs(y="Number of films",x="Decade")+
  theme(text=element_text(size=12,family="AvertaPE-Regular"),
        legend.title = element_blank(),
        legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = blue))

## Warning: Ignoring unknown parameters: check_overlap

DA_graph

library(rvest)
library(stringr)

load("ToBuy.Rda")

if(max(ToBuy$Date)<(Sys.Date() %m-% days(7))){
  ToBuy <- CritiqueFilm$`Titre du film`[CritiqueFilm$`A acheter`=="A acheter"]
  ToBuy <- as.data.frame(ToBuy[!is.na(ToBuy)])
  ToBuy <- as.data.frame(strsplit(ToBuy[,1]," - "))
  ToBuy <- as.data.frame(t(ToBuy[2,]))
  ToBuy <- ToBuy$`2`
  ToBuy <- as.data.frame(ToBuy)
  colnames(ToBuy) <- "ToBuy"
  ToBuy$Link <- paste0("https://www.amazon.fr/s?k=",URLencode(ToBuy$ToBuy),"+blu-ray")
  
  for (b in 1:length(ToBuy$Link)){
    url <- ToBuy$Link[b]
    website <- read_html(url)
    ToBuy$Price[b] <- html_text(html_nodes(website,".a-price-whole"))[1]
    ToBuy$Name[b] <- html_text(html_nodes(website,".s-line-clamp-4"))[1]
  }
  
  ToBuy$Price <- as.numeric(str_replace(ToBuy$Price,",","."))
  ToBuy$Date <- Sys.Date()
  save(ToBuy,file="ToBuy.Rda")}

load("ToBuy.Rda")

ToBuy <- ToBuy[order(ToBuy$Price),]
ToBuy$Price[ToBuy$Price>=30] <- NA
ToBuy$Price[ToBuy$Price<4] <- NA
ToBuy <- ToBuy[!is.na(ToBuy$Price),]

ToBuy$Price_rounded <- round(ToBuy$Price/2)*2
Bluray <- table(ToBuy$Price_rounded)
Bluray <- as.data.frame(Bluray)
Bluray$Var1 <- as.numeric(as.character(Bluray$Var1))

Bluray_graph <- ggplot(Bluray, aes(x=Var1, y=Freq))+
  geom_bar(stat = "identity", fill=purple, width = 0.01)+
  geom_point(size = 3, color = blue)+
  geom_text(aes(label = paste0(Freq)),
            size=3,
            hjust=0.5,
            vjust=-2,
            family="AvertaPE-Regular",
            check_overlap = T) +
  ylim(0,15)+
  theme(text=element_text(size=12,family="AvertaPE-Regular"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = blue))+
  labs(title="Number of BluRay to buy",
       y="Count", x="Price")
Bluray_graph

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).

ToBuyTop <- ToBuy[1:10,c(1,3)]
ToBuyTop <- as.data.frame(ToBuyTop)
rownames(ToBuyTop) <- 1:10

ToBuyTop %>%
  mutate(Price = color_tile(blue, purple)(Price)) %>% 
  kable(escape = F, align = c("l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(2, bold = T, color = white)

ToBuy	Price
Dragons 2	6.00
Kung Fu Panda 3	6.00
L’Âge de Glace 1	6.19
Patients	6.20
Les Nouveaux Héros	7.37
Sully	7.70
The King’s Man : Première Mission	7.81
Les Douze Travaux d’Astérix	7.99
Imitation Game	8.24
Get Out	8.38

Year_graph_DB <- select(CritiqueFilm,`Date de sortie`,`Notes cummulées`,Grade,Saga)
Year_graph_DB <- Year_graph_DB[Year_graph_DB$`Date de sortie`>as.Date("1985-01-01"),]

Year_graph_DB$Saga[!Year_graph_DB$Saga %in% (Decade_Grade_Saga$Saga %>% unique() %>% head(5))] <- "Trend"

Year_graph <- ggplot(Year_graph_DB, aes(x=`Date de sortie`,y=`Notes cummulées`/2)) +
  geom_point(colour="#F2F2F2")+
  ylim(0,5)+
  scale_size_continuous(range=c(0.1,0.5))+
  geom_smooth(aes(group=Saga, col = Saga), method = lm, formula = y ~ splines::bs(x, 4), se = FALSE)+
  scale_color_manual(values=mypal(6)) +
  labs(y="Number of films",x="Year")+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_graph

#I import a picture and I set the size and the float
knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/Théotime/PhotoCV.svg")

## Warning in knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"): It is highly recommended to use relative paths for
## images. You had absolute paths: "/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"

Analysis conducted by Théotime Bourgeois

Master of Science - Data Science & Organizational Behavior

by Burgundy School of Business

Oscar <- NamesFilm[!is.na(NamesFilm$Oscar),]
Oscar <- Oscar[Oscar$Année>=YearMin_graph,]
Oscar$OscarTF <- Oscar$Oscar %>% str_detect("Oscar")


ggplot(Oscar, aes(x = Année, y = `Notes cummulées`))+
  geom_count(colour = purple)+
  geom_point(data = Oscar %>% filter(OscarTF == TRUE), colour = blue)+
  scale_size("Count", range = c(1, 6))+
  #stat_summary(aes(y = `Notes cummulées`,group = 1), fun=mean, colour=yellow,geom="line")+
  geom_smooth(aes(group=OscarTF, col = OscarTF), method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  scale_color_manual("Winner", values=c(purple,blue)) +
  labs(y="Number of films",x="Year")+
  labs(title="Evolution of the scores of the films presented at the Oscars",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

Distri_Circle <- table(NamesFilm$`Maison de distribution`) %>%
  as.data.frame() %>%
  `colnames<-`(c("Maison de distribution","Freq"))

Distri_Circle <- merge(Distri_Circle,
      NamesFilm %>% select(`Maison de distribution`,`Maison mère`) %>% unique())

Distri_Circle <- Distri_Circle %>%
  mutate(root="root") %>%
  filter(!is.na(`Maison mère`)) %>%
  select(root,`Maison mère`,`Maison de distribution`,Freq) %>% 
  `colnames<-`(c("root","group","subgroup","value")) %>% 
  filter(group != "France") %>% 
  filter(group != "Autre") %>%
  filter(value > 10)


Distri_Circle$subgroup <- paste0(Distri_Circle$subgroup," (",Distri_Circle$value,")")

Distri_Circle$pathString <- paste("world", Distri_Circle$group, Distri_Circle$subgroup, sep = "/")
population <- as.Node(Distri_Circle)

# Make the plot
#circlepackeR(population, size = "value")

# You can custom the minimum and maximum value of the color range.
p <- circlepackeR(population, size = "value", color_min = "hsl(240, 31%, 25%)", color_max = "hsl(0, 0%, 0%)")
saveWidget(p, file="circles.html")
# p

Top <- 5

Top_Acteur <- Acteur$Acteur %>%
  head(Top) %>% 
  as.character()

Acteur_merge_Top <- Acteur_merge %>% filter(Acteur_merge$Acteur %in% Top_Acteur)



Acteur_Top_graph <-
  ggplot(Acteur_merge_Top, aes(x=`Date de sortie` ,y=`Nos notes`,col = Acteur))+
  geom_point()+
  geom_smooth(aes(group=Acteur),
              method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  scale_color_manual(values=mypal(Top)) +
  labs(title="Count of films per Year",
       subtitle=paste0("from ",
                       Acteur_merge_Top$`Date de sortie` %>%
                         format("%Y") %>%
                         as.numeric() %>%
                         min(),
                       " to ",
                       YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_Top_graph

Acteur_Proj <- rbind(
  NamesFilm %>% select(`Acteur 1`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
  NamesFilm %>% select(`Acteur 2`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
  NamesFilm %>% select(`Acteur 3`,Seen) %>% `colnames<-`(c("Acteur","Seen"))) %>%
  table() %>%
  as.data.frame.matrix() %>%
  arrange(`TRUE`) %>% 
  arrange(desc(`FALSE`)) %>%
  filter(`TRUE`!=0) %>% 
  head(10)

Acteur_Proj <- Acteur_Proj %>% 
  mutate(Acteur = rownames(Acteur_Proj)) %>% 
  select(Acteur, `TRUE`, `FALSE`) %>% 
  `colnames<-`(c("Acteur","Seen","NotSeen")) %>% 
  mutate(Total = Seen+NotSeen) %>%
  arrange(desc(Seen)) %>% 
  arrange(desc(Total)) %>% 
  mutate(Acteur = fct_reorder(Acteur,Total))

rownames(Acteur_Proj) <- 1:nrow(Acteur_Proj)

ggplot(Acteur_Proj, aes(y = Acteur))+
  geom_segment(aes(x = 0, xend = Total, yend = Acteur, col = "Not Seen"), size = 9)+
  geom_segment(aes(x = 0, xend = Seen, yend = Acteur, col = "Seen"), size = 6)+
  geom_point(aes(x = Seen, col = "Seen"), size = 5)+
  geom_point(aes(x = Total, col = "Not Seen"), size = 8)+
  scale_color_manual(values = c(purple, pink))+
  geom_text(aes(x=Seen, label = Seen), col = white, family = "AvertaPE-Black")+
  geom_text(aes(x=Total, label = Total), col = white, family = "AvertaPE-Black")+
  labs(title="Title",
       subtitle="Test",
       y=NULL, x="Count",
       col = "Movies")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"

pastel <- c("#9B553A",
            "#3F4A4D",
            "#728989",
            "#9FB9AC",
            #"#CEAF65",
            "#847359",
            "#6D836E",
            "#455C46",
            #"#F5DAA1",
            "#E59B97",
            "#9E6B66",
            "#513136")
colpastel <- colorRampPalette(pastel)

CritiqueFilm <- separate(data = CritiqueFilm, col = `Meilleure film/année`, into = c("RangAnnée", "Année"), sep = "-")

Top100 <- CritiqueFilm %>%
  arrange(Rang) %>% 
  head(100) %>% 
  arrange(`Date de sortie`) %>% 
  mutate(ID = 1,
         ID = cumsum(ID))

world <- map_data('world')
world <- ne_countries(scale = "medium", returnclass = "sf")
world$color[world$sovereignt %in% Top100$`Pays d'origine`] <- world$sovereignt[world$sovereignt %in% Top100$`Pays d'origine`]

world <- world[world$sovereignt!="Antarctica",]

Map_graph <- ggplot(data = world) +
  geom_sf(aes(fill=color), color=NA)+
  scale_fill_manual(values = colpastel(world$color %>% unique() %>% length()-1), na.value="#CEAF65")+
  theme(panel.background = element_rect(fill = "#E8E8DC"),
        plot.background = element_rect(fill = "#E8E8DC"),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.x = element_blank(),
        axis.ticks = element_blank(),
        text = element_blank())

Country <- Top100$`Pays d'origine` %>%
  unique() %>%
  as.data.frame() %>%
  mutate(Freq = 3)

Country$.[Country$.=="United States of America"] <- "USA"

Country_graph <- ggplot(Country, aes(., Freq)) +
  geom_col(aes(fill = .), position = 'stack', width = 1) +
  scale_fill_manual(values = colpastel(7)) +
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "black"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())+
  scale_y_continuous(limits = c(-20, max(Country$Freq))) +
  coord_curvedpolar()

ggsave(file="Map_graph.svg", plot=Map_graph, width=10, height=8)
ggsave(file="MCountry_graph.png", plot=Country_graph, width=10, height=8)

Année <- CritiqueFilm %>%
  select(Année,`Pays d'origine`,Grade) %>%
  filter(Grade=="A",`Pays d'origine`%in% (c(Country$.,"United States of America"))) %>%
  arrange(Année) %>% 
  mutate(ID = 1,
         ID = cumsum(ID))

AnnéeTable <- Année %>% filter(Année==Top100$Année[1]) %>% mutate(ID=1)

for (year in 2:nrow(Top100)){
  AnnéeTable <- rbind(AnnéeTable,Année %>% filter(Année==Top100$Année[year]) %>% mutate(ID=year))
}

AnnéeGraph <- ggplot(AnnéeTable)+
  geom_bar(aes(x= ID, fill = `Pays d'origine`),position="fill")+
  scale_fill_manual(values = colpastel(7))+
  coord_polar()+
  scale_x_continuous(limits = c(-4, 104))+
  scale_y_continuous(limits = c(-7, 1))+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="AnnéeGraph.svg", plot=AnnéeGraph, width=10, height=8)

Category <- Top100 %>%
  select(`English Title`, ID, Scénario, `Acteurs / Personnages`, `Ambiance / Concept`, `Aspect Visuel`, `Aspect Sonore`) %>% 
  pivot_longer(cols=3:7, names_to = "Category", values_to = "Grade")

Category <- merge(Category,
    Category$Category %>%
      unique() %>%
      as.data.frame() %>%
      mutate(CategoryID = 1,
             CategoryID = cumsum(CategoryID)) %>%
      `colnames<-`(c("Category","CategoryID")),
    by = "Category") %>% 
  filter(Grade==5)

Category_graph <- ggplot()+
  geom_bin2d(data = Category, aes(x = ID, y=CategoryID, fill=Category),binwidth = c(1, 1))+
  coord_polar()+
  xlim(c(-4,104))+
  ylim(c(-20,6))+
  scale_fill_manual(values = colpastel(5))+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Category_graph.svg", plot=Category_graph, width=10, height=8)

Décénie <- Top100 %>%
  select(Décénie) %>% 
  mutate(Freq = 1,
         Décénie = Décénie %>% as.character()) %>% 
  group_by(Décénie) %>% 
  summarise(label=cumsum(Freq),
            Count=max(label)) %>% 
  ungroup() %>% 
  mutate(Freq=1,ID=1,
         ID=cumsum(ID),
         label=ifelse(Count<3,NA,label),
         label = ifelse(label==1,Décénie,NA))

## `summarise()` has grouped output by 'Décénie'. You can override using the
## `.groups` argument.

Décéniegraph <- ggplot(data=Décénie, aes(ID, Freq)) +
  geom_col(aes(fill = Décénie), position = 'stack', width = 1.1) +
  scale_fill_manual(values = colpastel(7)) +
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-15, max(Décénie$Freq))) +
  geom_textpath(aes(x=ID, y=Freq, label=label), vjust=-0.8, hjust=1, color="white", size=3, inherit.aes = FALSE )+
  coord_polar()

ggsave(file="Décéniegraph.svg", plot=Décéniegraph, width=10, height=8)

## Warning: position_stack requires non-overlapping x intervals

## Warning: Removed 95 rows containing missing values (geom_textpath).

angle <-  77 - 333 * (Top100$ID) /100
Top100$hjust<-ifelse( angle < -90, 1, 0)
Top100$angle<-ifelse(angle < -90, angle+180, angle)
Top100$label <- ifelse(angle < -90, paste0(Top100$`English Title`," -",Top100$Année),paste0(Top100$Année,"- ",Top100$`English Title`))

Top100$Décénie <- Top100$Décénie %>% as.character()

y <- 6

Titlegraph <- ggplot(Top100, aes(x=ID, y=y))+
  geom_col(aes(fill = Décénie), position = 'stack', width = 1.1, alpha=.3)+
  geom_text(aes(label = label, y=0.2, hjust=hjust, angle=angle), size=1)+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-10,y)) +
  scale_fill_manual(values = colpastel(7)) +
  coord_polar()+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Titlegraph.svg", plot=Titlegraph, width=10, height=8)

## Warning: position_stack requires non-overlapping x intervals

Réal <- Top100$Réalisateur %>% table() %>% as.data.frame() %>% arrange(desc(Freq)) %>% head(10) %>% `colnames<-`(c("Réalisateur","Freq"))
Réal$Col <- colpastel(10)
Top100 <- merge(Top100,Réal,by="Réalisateur",all=T)

Réalgraph <- ggplot(Top100, aes(x=ID, y=y))+
  geom_text(aes(label = Réalisateur, y=0.2, hjust=hjust, angle=angle, col=Col), size=1)+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-10,y)) +
  scale_color_manual(values = colpastel(11)) +
  coord_polar()+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Réalgraph.svg", plot=Réalgraph, width=10, height=8)


# Top100 <- merge(Top100,
#       world %>% as.data.frame() %>% select(sovereignt,iso_a2) %>% unique() %>% `colnames<-`(c("Pays d'origine","iso")),
#       by="Pays d'origine")

Me and the Cinema

Théotime Bourgeois

25 October, 2022