library(readxl)
library(tibble)
library(ggplot2)
library(tidyverse)
library(tm)
library(showtext)
library(lubridate)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(officer)
library(dplyr)
library(showtext)
library(tidyr)
library(knitr)
library(kableExtra)
library(cowplot)
library(colorspace)
library(ggrepel)
library(sf)
#library(tmap)    # for static and interactive maps
library(leaflet) # for interactive maps
#library(spData)
library(rnaturalearth)
library(leaflet.extras)
library(sp)
library(wbstats)
library(formattable)
library(rvest)
library(XML)
library(BBmisc)
library(xml2)
library(fmsb)
library(colormap)
library(circlize)
library(networkD3)
library(influential) #to create Sankey Diagram
library(igraph) #to create Sankey Diagram
library(oce) #to create Sankey Diagram
library(ggraph) #to create Sankey Diagram
library(devtools) #to add some external libraries
library(addTextLabels)
library(openxlsx)
library(data.tree) #to create a hierarchy
library(htmlwidgets) #to save interative graphs
library(circlepackeR) #to plot circles
library(geomtextpath)
#devtools::install_github("jeromefroe/circlepackeR") # If needed

font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Black.otf")
font_add(family = "AvertaPE-Black.otf",
         regular = "AvertaPE-Black.otf")
font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Regular.otf")
font_add(family = "AvertaPE-Regular.otf",
         regular = "AvertaPE-Regular.otf")

purple <- c("#00051E") #c("#2C2C54")
pink <- c("#A40E4C")
blue <- c("#2E86AB")
yellow <- c("#FF9C00")
lila <- c("#E3DFFF")
brown <- c("#C3979F")
grey <- c("#BFBFBF")
white <- c("#FFFFFF")
mycols2 <- c(blue,purple)
mycols3 <- c(purple,pink,blue)
mycols4 <- c(purple,pink,blue,blue,yellow)
mycols5 <- c(white,blue,purple)
allcols <- c(purple,blue,pink,yellow,lila,brown,grey)


mypal <- function(nbcol){
  colsample <- allcols[1:nbcol]
  return(colsample)
}

Introduction

Le cinéma ne dit pas autrement les choses, il dit autre chose.
The cinema does not say things differently, it says something else.

Éric Rohmer, French Director

First of all, the following analysis is purely subjective and is in no way representative of global consumption behaviour.
It is, however, representative of my cinema consumption since I was 20 years old and the data has been meticulously collected to arrive at this conclusion which is a snapshot at a given moment of my cinephilia with the biases that it generates: I am a young Frenchman who has been more or less influenced by his choice of films and who obviously has tastes that cannot be explained but that can be identified.

Overview of my Database

ToSeeFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Film à voir")

#Nettoyage
CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"

# GlobalInfos <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Bilan")
NamesFilm <- bind_rows(CritiqueFilm, ToSeeFilm) #Merge of my two databases
NamesFilm <- NamesFilm[!is.na(NamesFilm$`Titre du film`),]
NamesFilm$Seen <- TRUE
NamesFilm$Seen[is.na(NamesFilm$Scénario)] <- FALSE
# NamesFilm$Année <- as.numeric(format(NamesFilm$`Date de sortie`, format = "%Y"))
# NamesFilm$Décénie <- round(NamesFilm$Année/10,0)*10


NamesFilm$`Notes cummulées`[is.na(NamesFilm$`Notes cummulées`)] <- NamesFilm$`Note Presse`[is.na(NamesFilm$`Notes cummulées`)]*2

NamesFilm$Mois <- NamesFilm$`Date de sortie` %>% format("%m") %>% as.numeric()

IMDB <- NamesFilm %>%
  filter(is.na(`IMDB ID`),`Pays d'origine`!="France") %>% 
  select(`English Title`,Année, Réalisateur,`IMDB ID`)
  

# Nombre de films notés
count_movies_seen <- length(CritiqueFilm$`Titre du film`)

# Nombre de films à voir
count_movies_tosee <- length(ToSeeFilm$`Titre du film`)

# Nombre total de film
count_total <- count_movies_seen + count_movies_tosee


# Le meilleur réalisateur selon nos notes (minimum 3 films)
# Le meilleur réalisateur selon la presse (minimum 3 films)
# Le réalisateur le plus sous-côté (minimum 3 films)
Director_table <- as.data.frame(table(CritiqueFilm$Réalisateur))
Director_table$`Nos notes` <- Director_table$`Note Presse` <- 0
colnames(Director_table)[1] <- "Director"
Director_table <- Director_table[Director_table$Freq>=3,]

for (n in 1:length(Director_table$Director)){
  Director_table$`Nos notes`[n] <- mean(CritiqueFilm$`Nos notes`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
  Director_table$`Note Presse`[n] <- mean(CritiqueFilm$`Note Presse`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
}

Director_table$Surcote <- Director_table$`Nos notes`-Director_table$`Note Presse`

Best_director_forme <- as.character(Director_table$Director[Director_table$`Nos notes`==max(Director_table$`Nos notes`)])[1]
Best_director_forpresse <- as.character(Director_table$Director[Director_table$`Note Presse`==max(Director_table$`Note Presse`)])[1]
Surcote_director <- as.character(Director_table$Director[Director_table$Surcote==max(Director_table$Surcote)])[1]


# Le meilleur acteur (minimum 3 films)
# Le deuxième meilleur acteur (minimum 3 films)
# Le troisième meilleur acteur (minimum 3 films)
# L'acteur le plus prolifique


Acteur1 <- select(CritiqueFilm,`Acteur 1`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur2 <- select(CritiqueFilm,`Acteur 2`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur3 <- select(CritiqueFilm,`Acteur 3`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
colnames(Acteur3)[1] <- colnames(Acteur2)[1] <- colnames(Acteur1)[1] <- "Acteur"
Acteur_merge <- rbind(Acteur1,Acteur2,Acteur3)
rm(Acteur1,Acteur2,Acteur3)

Acteur <- as.data.frame(table(Acteur_merge$Acteur))
colnames(Acteur) <- c("Acteur","Freq") 
Acteur <- Acteur[order(-Acteur$Freq),]
Acteur_Max <- as.character(Acteur$Acteur)[1]

for (t in 1:nrow(Acteur)){
  Acteur$Notes[t] <- round(mean(Acteur_merge$`Nos notes`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
  Acteur$Presse[t] <- round(mean(Acteur_merge$`Note Presse`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
}

Acteur$Total <- Acteur$Notes + Acteur$Presse
Acteur <- Acteur[Acteur$Freq>3,]
Acteur <- Acteur[order(-Acteur$Total),]
Best_actor <- as.character(Acteur$Acteur[1:3])

# La meilleure année selon les films notés
Year_data <- as.data.frame(table(CritiqueFilm$Année))
colnames(Year_data)[1] <- "Year"

for (y in 1:length(Year_data$Year)){
  Year_data$Note[y] <- mean(CritiqueFilm$`Notes cummulées`[Year_data$Year[y]==CritiqueFilm$Année], na.rm = T)
}

Year_data <- Year_data[Year_data$Freq>=5,]
Best_year <- as.character(Year_data$Year[Year_data$Note==max(Year_data$Note, na.rm = T)])

# Le meilleur mois pour aller voir un film au cinéma en France
Month <- as.data.frame(table(select(CritiqueFilm,Mois)))

for (m in 1:length(Month$Var1)){
  Month$Grade[m] <- mean(CritiqueFilm$`Nos notes`[Month$Var1[m]==CritiqueFilm$Mois], na.rm = T)
}

Best_Month <- month.name[as.numeric(Month$Var1[max(Month$Grade)==Month$Grade])]


# La meilleure société de distribution en fonction des notes
Distri <- select(CritiqueFilm,`Maison de distribution`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison de distribution`))
Distri_table <- Distri_table[Distri_table$Freq>3,]

for (m in 1:length(Distri_table$Var1)){
  Distri_table$Note[m] <- mean(Distri$`Nos notes`[Distri$`Maison de distribution`==Distri_table$Var1[m]],na.rm = T)
}
Best_distri <- as.character(Distri_table$Var1[max(Distri_table$Note)==Distri_table$Note])

# Maison de distribution avec le plus de parts de marché

Distri <- select(CritiqueFilm,`Maison mère`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison mère`))
Distri_table <- Distri_table[Distri_table$Var1!="France",]
Distri_table <- Distri_table[order(-Distri_table$Freq),]

Most_Distri <- as.character(Distri_table$Var1[1])
Most_Distri_Percent <- round(sum(Most_Distri==CritiqueFilm$`Maison mère`, na.rm = T)*100/count_movies_seen,1)

Total_percent <- round(100*count_movies_seen/(count_movies_tosee+count_movies_seen),1)


Duration <- mean(CritiqueFilm$Durée, na.rm = T)
Duration_txt <- Duration/60
Duration_txt <- paste0(as.integer(Duration_txt),"h",round((Duration_txt-as.integer(Duration_txt))*60))

Beginning <- as.Date("2019-09-12")
Count_days <- as.numeric(Sys.Date()-Beginning)

Duration_seen <- (Duration*count_movies_seen/Count_days)
Duration_txt_seen <- Duration_seen/60
Duration_txt_seen <- paste0(as.integer(Duration_txt_seen),"h",round((Duration_txt_seen-as.integer(Duration_txt_seen))*60))
Films_per_day <- round(count_movies_seen/Count_days,2)

Filmtoaddparday <- sum(CritiqueFilm$Année==2018 | CritiqueFilm$Année==2019, na.rm=T)/360
Nb_day <- count_movies_tosee*Duration/Duration_seen
Nb_day2 <- round(Nb_day+Nb_day*Filmtoaddparday,0)

Nb_day <- Sys.Date()+Nb_day
Nb_day2 <- Sys.Date()+Nb_day2

Sub <- paste0("Based on ",count_movies_seen," movies seen")

As a lifelong film enthusiast, I created a database in September 2019 (1139 days ago) allowing me to track the films I watch and to structure my cinephilia.
So I have seen 856 films in the last few years and I have a list of over 1328 films to see. This analysis is therefore evolving!
Who are my favourite directors? What are the best films according to me and according to the press? What kind of films are the most represented? Which actor is the most present in my filmography? All these questions will be answered in this report! I will start by giving you an overview of my film consumption and then go into more detail in the dedicated sections.

The recipe for a good film? Still unknown but if I had to summarize the 856 films I have seen, this is what I can say:

  • The best director according to our ratings (minimum 3 films) : Gore Verbinski
  • The best director according to the press (minimum 3 films) : Peter Jackson
  • The most underrated director (minimum 3 films) : Louis Leterrier
  • Best actor (minimum 3 films) : Viggo Mortensen
  • Second best actor (minimum 3 films) : Tim Allen
  • Third best actor (minimum 3 films) : Leonardo DiCaprio
  • Most prolific actor: Hugh Jackman
  • The best year according to the rated films : 1994
  • The best month to see a film in France: May
  • The best distribution company according to the scores : Pixar
  • Distribution company with the highest market share : Disney (25.2%)
  • Percentage of advancement : 39.2%
  • Average duration of a film : 1h51
  • Daily time spent watching films : 1h23 or 0.75 film per day
  • End date if no film is added to my list again : 2027-08-27
  • End date if I add films at the same rate as today : 2029-03-28

Explanation of my scoring system


In order to establish a ranking of films, actors, directors… I had to decide on some rating criteria that will allow me to evaluate the main elements that make up a film. So here are the 5 criteria I rate out of 5:

Explication_Sample <- CritiqueFilm[CritiqueFilm$Grade=="A" & CritiqueFilm$`Note Presse`>=4 & CritiqueFilm$Saga=="Saga" & !is.na(CritiqueFilm$`English Title`) & CritiqueFilm$`Pays d'origine`!="France",]
Explication_Sample <- select(Explication_Sample,`English Title`,Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`,`Aspect Sonore`)
Explication_Sample <- as.data.frame(Explication_Sample)


Explication_Sample_Scenario <- sample(Explication_Sample$`English Title`[Explication_Sample$Scénario==5],3)
Explication_Sample_Scenario <- paste0(Explication_Sample_Scenario[1],", ",Explication_Sample_Scenario[2]," or ",Explication_Sample_Scenario[3])
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Scenario),]

Explication_Sample_Acteur <- sample(Explication_Sample$`English Title`[Explication_Sample$`Acteurs / Personnages`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Acteur),]
Explication_Sample_Acteur <- paste0(Explication_Sample_Acteur[1],", ",Explication_Sample_Acteur[2]," or ",Explication_Sample_Acteur[3])

Explication_Sample_Ambiance <- sample(Explication_Sample$`English Title`[Explication_Sample$`Ambiance / Concept`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Ambiance),]
Explication_Sample_Ambiance <- paste0(Explication_Sample_Ambiance[1],", ",Explication_Sample_Ambiance[2]," or ",Explication_Sample_Ambiance[3])

Explication_Sample_Visuel <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Visuel`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Visuel),]
Explication_Sample_Visuel <- paste0(Explication_Sample_Visuel[1],", ",Explication_Sample_Visuel[2]," or ",Explication_Sample_Visuel[3])

Explication_Sample_Sonore <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Sonore`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Sonore),]
Explication_Sample_Sonore <- paste0(Explication_Sample_Sonore[1],", ",Explication_Sample_Sonore[2]," or ",Explication_Sample_Sonore[3])

scoring_system <- as.data.frame(matrix(data = NA, nrow = 5, ncol = 2))
scoring_system[,1] <- c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect")
scoring_system[1,2] <- mean(CritiqueFilm$Scénario, na.rm = T)
scoring_system[2,2] <- mean(CritiqueFilm$`Acteurs / Personnages`, na.rm = T)
scoring_system[3,2] <- mean(CritiqueFilm$`Ambiance / Concept`, na.rm = T)
scoring_system[4,2] <- mean(CritiqueFilm$`Aspect Visuel`, na.rm = T)
scoring_system[5,2] <- mean(CritiqueFilm$`Aspect Sonore`, na.rm = T)
scoring_system[,2] <- round(scoring_system[,2],2)
colnames(scoring_system) <- c("Categories","Grade")

scoring_system$ID <- 1:5

scoring_system_graph <- ggplot(scoring_system, aes(x=Categories, y=Grade))+
  geom_hline(yintercept = mean(scoring_system[,2]),col = grey)+
  geom_segment( aes(x=ID, xend=ID, y=3, yend=Grade), col = blue)+
  geom_point(size = 3, color = purple, fill = "white",shape=21, stroke=2)+
  geom_text(aes(label = Grade),
            size=3,
            hjust=0.5,
            vjust=-1.5,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_x_discrete(guide = guide_axis(n.dodge=2),
                   limits = c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect"))+
  ylim(3,4.5)+
  labs(title="Average of grades per Categorie")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

scoring_system_graph

  • The scenario: Essential for a good film, it keeps us on the edge of our seats, makes us passionate, questions us and is in my opinion the most important. A film with an impeccable visual quality without a script will remain a bad film. Here are for example 3 films that I evaluated with an excellent script: 1917, Shutter Island or Forrest Gump

  • Actors and characters: This category is an indissociable part of the rating system and allows us to identify whether the casting is successful and therefore whether the actors are good and correspond perfectly to the character they play. This category is obviously rated higher than the others since the actors contribute most to the credibility of a film and most of the time give their best as in : Gone Girl, Interstellar or Hachi: A Dog’s Tale

  • Atmosphere and concept: Each film has its own universe that can transport us and sometimes we want to see more… or not! The atmosphere of the film allows us to stay hooked to the plot and to feel unique emotions. The concept allows innovation in an environment that we think is already saturated but we will see that many recent films have really new concepts like : The Grand Budapest Hotel, The Green Mile or The Prestige

  • Visual aspect: The aesthetics of the film is a central element. The visual aspect consists in evaluating the visual beauty of the film, its risk-taking, its camera movements, its editing, its special effects, its photography etc. Here are some films with an interesting visual aspect: Ready Player One, The Curious Case of Benjamin Button or Your Name.

  • Sound aspect: Finally, the sound aspect echoes the atmosphere of the film as it includes both the soundtrack and all the work done on sound, sound effects etc. to make it all coherent. Although the soundtrack has a central place in the evaluation of this criterion, some films enjoy quite incredible sound effects that sometimes absorb the musical theme. Here are 3 films with impeccable sound effects: Titanic, The Imitation Game or Whiplash

Top of my movies

TopFilms <- CritiqueFilm[str_detect(CritiqueFilm$`Noté par`, "Théotime", negate = FALSE),]

TopFilms <- select(TopFilms,`English Title`,Année,`Emoji Pays`, Réalisateur,`Notes cummulées`)
TopFilms <- TopFilms[order(-TopFilms$`Notes cummulées`),]
colnames(TopFilms) <- c("Title","Year","Country","Director","Grade")
Top <- 100
TopFilms <- head(TopFilms,Top)
TopFilms$Country[TopFilms$Country=="United States of America"] <- "USA"
TopFilms$Rank <- 1:Top
TopFilms <- TopFilms %>% select(6,1:5)

TopFilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("c","l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(6, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Title Year Country Director Grade
1 Forrest Gump 1994 🇺🇸 Robert Zemeckis 9.6
2 The Dark Knight 2008 🇺🇸 Christopher Nolan 9.5
3 The Lion King 1994 🇺🇸 Roger Allers 9.5
4 Joker 2019 🇺🇸 Todd Philips 9.5
5 The Green Mile 2000 🇺🇸 Frank Darabont 9.5
6 The Lord of the Rings: The Fellowship of the Ring 2001 🇺🇸 Peter Jackson 9.5
7 The Lord of the Rings: The Two Towers 2002 🇺🇸 Peter Jackson 9.5
8 The Lord of the Rings: The Return of the King 2003 🇺🇸 Peter Jackson 9.5
9 Pulp Fiction 1994 🇺🇸 Quentin Tarantino 9.5
10 1917 2020 🇬🇧 Sam Mendes 9.4
11 Bohemian Rhapsody 2018 🇺🇸 Bryan Singer 9.4
12 Dune 2021 🇺🇸 Denis Villeneuve 9.4
13 Interstellar 2014 🇺🇸 Christopher Nolan 9.4
14 Spider-Man: Into the Spider-Verse 2018 🇺🇸 Peter Ramsey 9.4
15 Zack Snyder’s Justice League 2021 🇺🇸 Zack Snyder 9.3
16 Soul 2020 🇺🇸 Pete Docter 9.3
17 Kingsman: The Secret Service 2015 🇺🇸 Matthew Vaughn 9.3
18 Léon: The Professional 1994 🇫🇷 Luc Besson 9.3
19 Slumdog Millionaire 2009 🇬🇧 Danny Boyle 9.3
20 Titanic 1998 🇺🇸 James Cameron 9.3
21 Toy Story 3 2010 🇺🇸 Lee Unkrich 9.2
22 How to Train Your Dragon 2010 🇺🇸 Dean DeBlois 9.2
23 Inception 2010 🇺🇸 Christopher Nolan 9.2
24 Jurassic Park 1993 🇺🇸 Steven Spielberg 9.2
25 Guardians of the Galaxy 2014 🇺🇸 James Gunn 9.2
26 Back to the Future 1985 🇺🇸 Robert Zemeckis 9.2
27 Star Wars: Episode III – Revenge of the Sith 2005 🇺🇸 George Lucas 9.2
28 Star Wars : Episode V – The Empire Strikes Back 1980 🇺🇸 Irvin Kershner 9.2
29 Star Wars: Episode VI – Return of the Jedi 1983 🇺🇸 Richard Marquand 9.2
30 Hacksaw Ridge 2016 🇺🇸 Mel Gibson 9.2
31 Coco 2017 🇺🇸 Lee Unkrich 9.1
32 Green Book 2018 🇺🇸 Peter Farrelly 9.1
33 Harry Potter and the Deathly Hallows: Part 2 2011 🇺🇸 David Yates 9.1
34 Skyfall 2012 🇬🇧 Sam Mendes 9.1
35 The Curious Case of Benjamin Button 2009 🇺🇸 David Fincher 9.1
36 Rise of the Planet of the Apes 2011 🇺🇸 Rupert Wyatt 9.1
37 The Pianist 2002 🇫🇷 Roman Polanski 9.1
38 The Shawshank Redemption 1995 🇺🇸 Frank Darabont 9.1
39 Guardians of the Galaxy Vol. 2 2017 🇺🇸 James Gunn 9.1
40 Avengers: Infinity War 2018 🇺🇸 Frères Russo 9.1
41 Spider-Man: No Way Home 2021 🇺🇸 Jon Watts 9.1
42 Parasite 2019 🇰🇷 Bong Joon-ho 9.1
43 Rogue One: A Star Wars Story 2016 🇺🇸 Gareth Edwards 9.1
44 The Incredibles 2004 🇺🇸 Brad Bird 9.0
45 Incredibles 2 2018 🇺🇸 Brad Bird 9.0
46 The Great Gatsby 2013 🇺🇸 Baz Luhrmann 9.0
47 Casino Royale 2006 🇬🇧 Martin Campbell 9.0
48 Kick-Ass 2010 🇺🇸 Matthew Vaughn 9.0
49 Life of Pi 2012 🇺🇸 Ang Lee 9.0
50 Ford v Ferrari 2019 🇺🇸 James Mangold 9.0
51 Marvel’s The Avengers 2012 🇺🇸 Joss Whedon 9.0
52 Avengers: Endgame 2019 🇺🇸 Frères Russo 9.0
53 Pirates of the Caribbean: The Curse of the Black Pearl 2003 🇺🇸 Gore Verbinski 9.0
54 Spider-Man 2002 🇺🇸 Sam Raimi 9.0
55 Your Name. 2016 🇯🇵 Makoto Shinkai 9.0
56 X-Men: Days of Future Past 2014 🇺🇸 Bryan Singer 9.0
57 War for the Planet of the Apes 2017 🇺🇸 Matt Reeves 8.9
58 Limitless 2011 🇺🇸 Neil Burger 8.9
59 Sherlock Holmes 2010 🇺🇸 Guy Ritchie 8.9
60 Star Trek Into Darkness 2013 🇺🇸 J. J. Abrams 8.9
61 Knives Out 2019 🇺🇸 Rian Johnson 8.9
62 Aladdin 1992 🇺🇸 John Musker et Ron Clements 8.9
63 Toy Story 1996 🇺🇸 John Lasseter 8.9
64 WALL‐E 2008 🇺🇸 Andrew Stanton 8.9
65 The Prestige 2006 🇺🇸 Christopher Nolan 8.9
66 The Batman 2022 🇺🇸 Matt Reeves 8.9
67 Charlie and the Chocolate Factory 2005 🇺🇸 Tim Burton 8.8
68 Monsters, Inc.  2002 🇺🇸 Pete Docter 8.8
69 Zootopia 2016 🇺🇸 Byron Howard 8.8
70 How to Train Your Dragon 2 2014 🇺🇸 Dean DeBlois 8.8
71 Gladiator 2000 🇺🇸 Ridley Scott 8.8
72 Harry Potter and the Deathly Hallows: Part 1 2010 🇺🇸 David Yates 8.8
73 No Time to Die 2021 🇬🇧 Cary Joji Fukunaga 8.8
74 Kingsman: The Golden Circle 2017 🇺🇸 Matthew Vaughn 8.8
75 Spirited Away 2002 🇯🇵 Hayao Miyazaki 8.8
76 Mad Max: Fury Road 2015 🇦🇺 George Miller 8.8
77 Pirates of the Caribbean: Dead Man’s Chest 2006 🇺🇸 Gore Verbinski 8.8
78 Back to the Future Part II 1989 🇺🇸 Robert Zemeckis 8.8
79 Shrek 2001 🇺🇸 Andrew Adamson 8.8
80 Spider-Man 2 2004 🇺🇸 Sam Raimi 8.8
81 Star Wars: Episode II – Attack of the Clones 2002 🇺🇸 George Lucas 8.8
82 Star Wars: Episode IV – A New Hope 1977 🇺🇸 George Lucas 8.8
83 The Grand Budapest Hotel 2014 🇺🇸 Wes Anderson 8.8
84 Whiplash 2014 🇺🇸 Damien Chazelle 8.8
85 Ratatouille 2007 🇺🇸 Brad Bird 8.7
86 Wreck‐It Ralph 2012 🇺🇸 Rich Moore 8.7
87 How to Train Your Dragon: The Hidden World 2019 🇺🇸 Dean DeBlois 8.7
88 Dunkirk 2017 🇺🇸 Christopher Nolan 8.7
89 Gone Girl 2014 🇺🇸 David Fincher 8.7
90 The Imitation Game 2014 🇺🇸 Morten Tyldum 8.7
91 Klaus 2019 🇪🇸 Sergio Pablos 8.7
92 Dawn of the Planet of the Apes 2014 🇺🇸 Matt Reeves 8.7
93 The Lion King 2019 🇺🇸 Jon Favreau 8.7
94 Le Visiteur du futur 2022 🇫🇷 François Descraques 8.7
95 The Matrix 1999 🇺🇸 Les Wachowski 8.7
96 Pirates of the Caribbean: At World’s End 2007 🇺🇸 Gore Verbinski 8.7
97 Play 2020 🇫🇷 Anthony Marciano 8.7
98 Back to the Future Part III 1990 🇺🇸 Robert Zemeckis 8.7
99 The Shining 1980 🇺🇸 Stanley Kubrick 8.7
100 Shutter Island 2010 🇺🇸 Martin Scorsese 8.7

My consumption over time

YearMin <- min(CritiqueFilm$Année, na.rm = TRUE)
YearMax <- max(CritiqueFilm$Année, na.rm = TRUE)

Year_df <- data.frame(YearMin:YearMax)
for (k in 1:nrow(Year_df)){
  Year_df$CritiqueFilm[k] <- sum(as.numeric(CritiqueFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$NamesFilm[k] <- sum(as.numeric(NamesFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
  Year_df$Total <- Year_df$CritiqueFilm+Year_df$NamesFilm
}
Year_df <- as.data.frame(Year_df)

YearMin_graph <- 1998
YearMax_graph <- 2022

Year_Grade <- select(CritiqueFilm,Année,`Nos notes`)
Year_Grade <- Year_Grade[Year_Grade$Année>=YearMin_graph & Year_Grade$Année<=YearMax_graph,]

Year_Grade_2019 <- round(mean(Year_Grade$`Nos notes`[Year_Grade$Année==2019], na.rm = T),1)
Year_Grade_Min <- round(min(Year_Grade$`Nos notes`, na.rm = T),1)

Year_Grade_graph <- ggplot(Year_Grade, aes(Année, `Nos notes`)) +
  geom_bin2d(binwidth = c(1, 1/3))+
  scale_x_continuous(breaks = seq(YearMin_graph, YearMax_graph, 2))+
  geom_smooth(method = lm, col = white, se = FALSE)+
  scale_fill_gradient(low=purple, high = blue)+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year", fill="Count")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph


This graph represents my film consumption since 1998, the year I was born. The lighter the colour, the more films I have seen with that rating in that period.
Since 2019, the creation of my database, we see a greater diversity of bad and good films with a tendency to be average overall. While the years before the creation of my file have higher average scores because they correspond to good films that “must” be seen.
Eventually, the aim will be to see more films over this period to complete each square from 1 to 5 in score and see a real trend that I imagine is decreasing.

Decades <- as.data.frame.matrix(table(select(NamesFilm,Décénie,Seen)))
colnames(Decades) <- c("To see","Seen")
Decades$Decades <- as.numeric(rownames(Decades))
Decades$Total <- as.numeric(Decades$`To see`+Decades$Seen)

Decades_graph <- ggplot(Decades)+
  geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
  geom_area(aes(x = Decades, y = Seen, fill = "Movies seen"))+
  geom_label(aes(x = Decades, y = Seen, label = paste(Seen)),
            fill=purple,
            colour = white,
            check_overlap = T) +
  scale_fill_manual(values=c(purple, blue))+
  scale_x_continuous(breaks = seq(1930,2020,10))+
  labs(title = "Volume of films to be seen and films seen\naccording to recommendations",
       y="Number of films",x="Decade",
       fill="Legend")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Decades_graph


This graph represents my film consumption since 1998, the year I was born. The lighter the colour, the more films I have seen with that rating in that period.
Since 2019, the creation of my database, we see a greater diversity of bad and good films with a tendency to be average overall. While the years before the creation of my file have higher average scores because they correspond to good films that “must” be seen.
Eventually, the aim will be to see more films over this period to complete each square from 1 to 5 in score and see a real trend that I imagine is decreasing.

CritiqueFilm$Différence <- CritiqueFilm$Différence %>% abs()

TopDiff <- filter(CritiqueFilm, CritiqueFilm$Différence > 1.4)

ggplot(CritiqueFilm, aes(`Note Presse`, `Nos notes`)) +
  geom_hex(binwidth = c(.2,.33), color = purple) +
  geom_smooth(col = pink, se = FALSE, method = "lm")+
  geom_abline(intercept = 0, color = grey) +
  geom_label_repel(data = TopDiff, aes(label = TopDiff$`English Title`),
            vjust = "inward", hjust = "inward",
            family="AvertaPE-Regular",
            size = 8/.pt)+
  scale_fill_gradient(low=purple, high = blue)+
  xlim(0.8,5.3)+
  ylim(0.8,5.3)+
  labs(title="Rating of the film compared to the press ratings",
       subtitle="Trend of overnotting")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))



My consumption

Date_Evolution <- CritiqueFilm %>%
  select(`Dernier visionnage`) %>%
  `colnames<-`(c("Date")) %>% 
  filter(!is.na(`Date`)) %>%
  arrange(`Date`) %>% 
  mutate(Week = format(`Date`, format = "%V") %>% as.numeric(),
         Day = format(`Date`, format = "%d") %>% as.numeric(),
         Month = format(`Date`, format = "%m") %>% as.numeric(),
         LastYear = `Date`>=(Sys.time()-(365*24*60*60)))

Date_Evolution$Week <- Date_Evolution$Week+53-(format(Sys.time(),"%V") %>% as.numeric())
Date_Evolution$Week[Date_Evolution$Week>52] <- Date_Evolution$Week[Date_Evolution$Week>52]-52
Date_Evolution$Week <- round(Date_Evolution$Week,0)

Date_Evolution_table <- Date_Evolution %>% select(Week,LastYear) %>% table() %>% as.data.frame()

ggplot(Date_Evolution_table, aes(x=Week, y=Freq, group = LastYear, color = LastYear))+
  geom_smooth(method = lm, formula = y ~ splines::bs(x, 7), se = FALSE)+
  labs(title="Identify a decrease and gaps in my consumption",
       subtitle="Film consumption over a year by week",
       color = "Timeline",
       x="Week", y="Count")+
  scale_color_manual(values = c(white, yellow), label = c("Global","This year"))+
  theme(text=element_text(size=12, family="AvertaPE-Regular",colour = white),
        title=element_text(colour = white),
        panel.background = element_rect(fill = purple),
        plot.background = element_rect(fill = purple, color = purple),
        panel.grid.major = element_line(colour = purple),
        panel.grid.minor = element_line(colour = purple),
        panel.border = element_blank(),
        panel.margin.x = NULL,
        panel.margin.y = NULL,
        legend.text = element_text(colour = white),
        legend.title = element_text(colour = white),
        legend.position = "right",
        legend.background = element_blank(),
        legend.key=element_blank(),
        axis.text = element_text(colour = white),
        axis.text.x = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = white),
        plot.caption = element_text(size = 10, color = blue))

Among the annual objectives, film consumption is central and must be more or less stable to achieve them. These curves allow us to identify the periodicity of this consumption according to the weeks on a sliding year with the current month on the right. Keeping the current year’s curve above the overall curve may be a priority to complete my cinephilia and achieve my goals. This filmography is a race against time and can be optimised by segmenting the films to see. Each must-see film is scored from 0 to 100% where 100 is the highest level of recommendation. Few are above 90% and can be considered a priority. The Academy Awards can also be an indicator of “quality” but more importantly of visibility, highlighting a variety of films although this selection is heavily influenced. Despite this sectorisation, the list of films to be seen is getting longer as well as shorter, but with a constant viewing frequency of one film per day, the list should be completed.

Table_Duration <- matrix(data = NA, ncol = 2, nrow = 4) %>%
  as.data.frame() %>% 
  `colnames<-`(c("Data","Caption"))

Table_Duration$Data[1] <- NamesFilm %>% filter(Reco > 0.9) %>% nrow()
Table_Duration$Caption[1] <- "movies with a recommendation higher than 90%"

Table_Duration$Data[2] <- NamesFilm %>% filter(str_detect(NamesFilm$Source,"#Oscar")) %>% nrow()
Table_Duration$Caption[2] <- "Academy Awards nominated films on my must-see list"

Table_Duration$Data[3] <- paste0(round((NamesFilm$Durée[NamesFilm$Seen==FALSE] %>% sum(na.rm = T))/60,0),"h")
Table_Duration$Caption[3] <- "cumulative duration of the films to be seen"

Table_Duration$Data[4] <- NamesFilm %>% filter(Seen==TRUE, `Dernier visionnage` > (Sys.Date() %m-% months(1))) %>% nrow()
Table_Duration$Caption[4] <- "films seen this past month"

Table_Duration %>%
  t() %>%
  as.data.frame() %>%
  kable(escape = F, align = c(rep("c", 10)),col.names = NULL, row.names = FALSE, booktabs = TRUE) %>%
  kable_styling(full_width = T) %>% 
  column_spec(1:4, width = "30em") %>% 
  row_spec(1, bold = T, color = yellow, font_size = 30) %>% 
  row_spec(2, bold = T, color = white)
13 208 2139h 11
movies with a recommendation higher than 90% Academy Awards nominated films on my must-see list cumulative duration of the films to be seen films seen this past month


Directors

DirectorTop <- 40
DirectorHead <- as.data.frame(table(CritiqueFilm$Réalisateur))
DirectorHead <- DirectorHead[order(-DirectorHead$Freq),]
DirectorHead <- DirectorHead[DirectorHead$Freq>=3,]
#DirectorHead <- head(DirectorList,DirectorTop)
colnames(DirectorHead) <- c("Director","Freq")
DirectorHead <- as.data.frame(DirectorHead)
DirectorHead$Director <- as.character(DirectorHead$Director)

for (k in 1:nrow(DirectorHead)){
      DirectorHead$Presse[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Note Presse`,na.rm = T),1)
      DirectorHead$OurGrades[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Nos notes`,na.rm = T),1)
      Pays <- CritiqueFilm$`Emoji Pays`[CritiqueFilm$Réalisateur==DirectorHead$Director[k]] %>% unique()
      DirectorHead$Countries[k] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

DirectorHead$Diff <- DirectorHead$OurGrades-DirectorHead$Presse
DirectorHead$Total <- DirectorHead$Presse + DirectorHead$OurGrades
DirectorHead <- DirectorHead[order(-DirectorHead$Total),]
DirectorHead$Rank <- rownames(DirectorHead) <- 1:nrow(DirectorHead)

DirectorHead <- select(DirectorHead, Rank, Director, Countries, Freq, OurGrades, Presse, Diff, Total)

DirectorHead %>% 
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(8, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Director Countries Freq OurGrades Presse Diff Total
1 Peter Jackson 🇺🇸 4 4.8 4.3 0.5 9.1
2 Christopher Nolan 🇺🇸 7 4.7 4.2 0.5 8.9
3 Brad Bird 🇺🇸 3 4.7 4.2 0.5 8.9
4 George Lucas 🇺🇸 4 4.8 4.1 0.7 8.9
5 Dean DeBlois 🇺🇸 3 4.6 4.3 0.3 8.9
6 Matt Reeves 🇺🇸 3 4.8 4.1 0.7 8.9
7 Matthew Vaughn 🇺🇸 5 4.8 4.0 0.8 8.8
8 Pete Docter 🇺🇸 4 4.5 4.3 0.2 8.8
9 Gore Verbinski 🇺🇸 3 4.8 4.0 0.8 8.8
10 Quentin Tarantino 🇺🇸 3 4.7 4.1 0.6 8.8
11 Sam Mendes 🇬🇧 3 4.8 4.0 0.8 8.8
12 Andrew Stanton 🇺🇸 3 4.5 4.2 0.3 8.7
13 Martin Scorsese 🇺🇸 4 4.4 4.2 0.2 8.6
14 Sam Raimi 🇺🇸 4 4.8 3.8 1.0 8.6
15 Guy Ritchie 🇺🇸 5 4.5 3.9 0.6 8.4
16 Bong Joon-ho 🇰🇷 3 4.4 4.0 0.4 8.4
17 David Yates 🇺🇸 7 4.6 3.8 0.8 8.4
18 Frères Russo 🇺🇸 4 4.3 4.1 0.2 8.4
19 J. J. Abrams 🇺🇸 4 4.6 3.7 0.9 8.3
20 Bryan Singer 🇺🇸 5 4.3 3.9 0.4 8.2
21 David Fincher 🇺🇸 4 4.2 4.0 0.2 8.2
22 John Lasseter 🇺🇸 4 4.3 3.9 0.4 8.2
23 Wes Anderson 🇺🇸 4 4.3 3.9 0.4 8.2
24 Danny Boyle 🇬🇧🇺🇸 3 4.4 3.8 0.6 8.2
25 Steven Spielberg 🇺🇸 6 4.2 3.9 0.3 8.1
26 Clint Eastwood 🇺🇸 3 4.0 4.1 -0.1 8.1
27 Jon Watts 🇺🇸 3 4.2 3.9 0.3 8.1
28 James Mangold 🇺🇸 5 4.1 3.9 0.2 8.0
29 Rian Johnson 🇺🇸 3 4.4 3.6 0.8 8.0
30 Zack Snyder 🇺🇸 7 4.3 3.6 0.7 7.9
31 Ridley Scott 🇺🇸 6 3.9 4.0 -0.1 7.9
32 George Miller 🇺🇸🇦🇺 3 4.4 3.5 0.9 7.9
33 Robert Zemeckis 🇺🇸 8 4.1 3.8 0.3 7.9
34 James Gunn 🇺🇸 4 4.1 3.8 0.3 7.9
35 Gary Trousdale et Kirk Wise 🇺🇸 3 4.1 3.8 0.3 7.9
36 Hayao Miyazaki 🇯🇵 5 3.6 4.2 -0.6 7.8
37 Francis Lawrence 🇺🇸 4 4.2 3.6 0.6 7.8
38 Ang Lee 🇺🇸 3 4.2 3.6 0.6 7.8
39 Chad Stahelski 🇺🇸 3 4.2 3.6 0.6 7.8
40 Marc Webb 🇺🇸 3 4.1 3.7 0.4 7.8
41 Tim Burton 🇺🇸 9 3.9 3.8 0.1 7.7
42 Andrew Adamson 🇺🇸 4 4.1 3.6 0.5 7.7
43 Pierre Coffin 🇺🇸 3 3.9 3.8 0.1 7.7
44 John Musker et Ron Clements 🇺🇸 5 3.7 3.9 -0.2 7.6
45 Tom McGrath 🇺🇸 5 4.1 3.5 0.6 7.6
46 Carlos Saldanha 🇺🇸 4 3.8 3.8 0.0 7.6
47 Philippe Lacheau 🇫🇷 4 3.8 3.8 0.0 7.6
48 Jennifer Yuh Nelson 🇺🇸 3 3.8 3.7 0.1 7.5
49 Taika Waititi 🇺🇸 3 4.0 3.5 0.5 7.5
50 Jon Favreau 🇺🇸 4 3.9 3.5 0.4 7.4
Director_graph <- ggplot(DirectorHead, aes(x = OurGrades, y = Freq))+
  geom_vline(xintercept = mean(DirectorHead$OurGrades,na.rm = T),
            col = grey)+
  geom_point(aes(size = Freq, colour = OurGrades > mean(OurGrades,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(DirectorHead$OurGrades),5.5)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Director,": ",OurGrades),colour = OurGrades > mean(OurGrades,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
  geom_text(aes(x=mean(OurGrades,na.rm = T)-0.1,label=round(mean(OurGrades,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  labs(title="Directors by volume and score",
       subtitle = "Sub",
       y="Frequency", x="Grade")
Director_graph

AListed <- CritiqueFilm %>% 
  select(Réalisateur, `Notes cummulées`) %>%
  `colnames<-`(c("Réalisateur","Notes")) %>% 
  group_by(Réalisateur) %>% 
  mutate(Max = max(Notes),
         Min = min(Notes),
         Mean = round(mean(Notes),1),
         Count = n()) %>% 
  arrange(desc(Count)) %>% 
  select(-Notes) %>% 
  unique() %>% 
  head(15) %>% 
  arrange(desc(Mean))

ggplot(AListed, aes(y = Réalisateur)) +
  geom_segment(aes(x=Min, xend=Max, y=Réalisateur, yend=Réalisateur),color="grey", size=.5)+
  geom_point(aes(x=Max,color="Max"), size=2)+
  geom_point(aes(x=Min,color="Min"), size=2)+
  geom_point(aes(x=Mean), color=yellow, size=7)+
  geom_text(aes(x=Mean, label = Count), col = purple, family = "AvertaPE-Black")+
  scale_y_discrete(limits = rev(AListed$Réalisateur))+
  scale_color_manual(values = c("Min" = pink, "Max" = blue), labels = c("Minimum", "Maximum", "Range"))+
  labs(title = "Director ratings with range between worst and best film",
       #subtitle = "Test",
       x = "Grades", y = NULL,
       color = "Grades",
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

Genre

Genre <- CritiqueFilm$Genre
Genre <- unlist(strsplit( Genre," / "))
Genre <- as.data.frame(table(Genre))
Genre <- Genre[order(-Genre$Freq),]
Genre$Freq <- round(1+Genre$Freq/10,0)


Genre1 <- select(CritiqueFilm,`Genre 1`,`Nos notes`)
Genre2 <- select(CritiqueFilm,`Genre 2`,`Nos notes`)
colnames(Genre2) <- colnames(Genre1) <- c("Genre","Note")
Genre_merge <- rbind(Genre1,Genre2)
Genre_merge


for(z in 1:nrow(Genre)){
  Genre$Notes[z] <- round(mean(Genre_merge$Note[Genre_merge$Genre == Genre$Genre[z]], na.rm = T),1)
}


wordcloud(words = Genre$Genre, freq = Genre$Freq, min.freq = 1,
          max.words=100, random.order=FALSE, rot.per=0, 
          colors=rev(mycols3),
          family = "AvertaPE-Black")

Genre <- head(Genre,sum(as.numeric(Genre$Freq>1))) %>% as.data.frame()

Genre_graph <- ggplot(Genre, aes(x = Notes, y = Freq))+
  xlim(min(Genre$Notes),max(Genre$Notes)+0.3)+
  geom_vline(xintercept = mean(Genre$Notes,na.rm = T),
            col = grey)+
  geom_point(aes(colour = Notes > mean(Notes,na.rm = T)))+ #I use a formula to have conditional colours
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  geom_text(aes(label = paste0(Genre,": ",Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            check_overlap = T) +
    geom_text(aes(x=mean(Notes,na.rm = T)-0.05,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 0,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  scale_color_manual(values=mypal(2)) +
  labs(title="Genre by volume and score",
       subtitle = Sub,
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Genre_graph

Genre_radar <- rbind(CritiqueFilm %>%
        select(Grade, `Genre 1`) %>%
        `colnames<-`(c("Grade","Genre")),
  CritiqueFilm %>%
    select(Grade, `Genre 2`) %>%
    `colnames<-`(c("Grade","Genre"))) %>% 
  filter(!is.na(Genre), Grade == "A" | Grade == "E") %>% 
  group_by(Grade, Genre) %>% 
  summarise(Count = n()) %>% 
  arrange(desc(Count)) %>%
  ungroup() %>% 
  mutate(Count = ifelse(Count > 75,75,Count),
    Count = BBmisc::normalize(Count, method="range"))

Top_Genre <- (Genre_radar %>% group_by(Genre) %>% summarise(Sum = sum(Count)) %>% arrange(desc(Sum)) %>% head(10))$Genre

Genre_radar <- Genre_radar %>% 
  filter(Genre %in% Top_Genre)

Skill_radar <-xtabs(formula=Count~Grade+Genre,data=Genre_radar) %>%
  as.data.frame.matrix()

Skill_radar <- Skill_radar %>%
  mutate(Grade = row.names(Skill_radar)) %>% 
  select(Grade, everything()) %>%
  `rownames<-`(1:nrow(Skill_radar)) %>% 
  select(Grade, c(Top_Genre))

library(ggradar)

Skill_radar_graph <- Skill_radar %>%
  ggradar(grid.label.size = 4,  # Affects the grid annotations (0%, 50%, etc.)
          axis.label.size = 3.2,
          group.point.size = 3,   # Simply the size of the point 
          group.colours = c(blue, pink))+
  labs(title = paste("Genre comparison between A-Listed and E-Listed"),
       caption = "Source : Critique Films")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        legend.position = c(-0.1,0.2),
        legend.justification = "left",
        legend.text = element_text(size = 10),
        legend.key = element_rect(fill = NA, color = NA),
        legend.background = element_blank(),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
  
Skill_radar_graph

Actors

Acteur <- Acteur[order(-Acteur$Freq),]

Acteur_graph <- ggplot(Acteur, aes(x = Notes, y = Freq))+
  geom_vline(xintercept = mean(Acteur$Notes,na.rm = T),
            col = grey)+
  geom_text(aes(x=mean(Notes,na.rm = T)-0.1,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  geom_point(aes(size = Freq, colour = Notes > mean(Notes,na.rm = T)))+
  scale_size_continuous(range=c(0.5,5)) +
  xlim(min(Acteur$Notes),6)+
  geom_text(aes(label = paste(Acteur,Notes),colour = Notes > mean(Notes,na.rm = T)),
            hjust=-0.1,
            vjust=-0.5,
            family="AvertaPE-Regular",
            size = 9/.pt,
            check_overlap = T) +
  scale_color_manual(values=mycols2) +
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Frequency", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_graph

rownames(Acteur) <- Acteur$Rank <- 1:length(Acteur$Acteur)



for (i in 1:nrow(Acteur)){
  Pays <- Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]][!is.na(Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]])] %>% unique()
  Acteur$Countries[i] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}

Acteur <- select(Acteur, Rank, Acteur, Countries, Freq, Notes, Presse, Total)

Acteur %>%
  mutate(Total = color_tile(blue, purple)(Total)) %>%
  head(50) %>% 
  kable(escape = F, align = c("l","l","c", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(2, bold = T) %>%
  column_spec(7, bold = T, color = white) %>%
  scroll_box(width = "100%", height = "400px")
Rank Acteur Countries Freq Notes Presse Total
1 Hugh Jackman 🇺🇸 14 3.8 3.5 7.3
2 Robert Downey Jr.  🇺🇸 13 4.3 3.9 8.2
3 Chris Evans 🇺🇸🇰🇷 12 4.1 3.6 7.7
4 Brad Pitt 🇺🇸🇫🇷 12 3.9 3.5 7.4
5 Johnny Depp 🇺🇸 11 4.2 3.8 8.0
6 Tom Hanks 🇺🇸 11 4.1 3.9 8.0
7 Daniel Radcliffe 🇳🇿🇺🇸 11 4.1 3.8 7.9
8 Robert De Niro 🇺🇸 11 3.7 3.6 7.3
9 Ryan Reynolds 🇺🇸 11 3.6 3.5 7.1
10 Seth Rogen 🇺🇸🇬🇧 11 3.3 3.0 6.3
11 Emma Watson 🇺🇸 10 4.2 3.9 8.1
12 Chris Hemsworth 🇺🇸 10 4.2 3.6 7.8
13 Ben Stiller 🇺🇸 10 3.8 3.3 7.1
14 Leonardo DiCaprio 🇺🇸 9 4.6 4.1 8.7
15 Rupert Grint 🇺🇸 9 4.3 3.9 8.2
16 Scarlett Johansson 🇺🇸 9 3.7 3.7 7.4
17 Zac Efron 🇺🇸 9 3.3 3.1 6.4
18 Natalie Portman 🇺🇸🇫🇷 8 4.4 3.8 8.2
19 Daniel Craig 🇺🇸🇬🇧 8 4.5 3.6 8.1
20 Chris Pratt 🇺🇸 8 4.3 3.7 8.0
21 Angelina Jolie 🇺🇸 8 4.1 3.8 7.9
22 Robin Williams 🇺🇸 8 4.1 3.6 7.7
23 Jake Gyllenhaal 🇺🇸🇨🇦 8 4.0 3.5 7.5
24 Bradley Cooper 🇺🇸 8 3.7 3.6 7.3
25 Marion Cotillard 🇫🇷🇺🇸 8 3.8 3.5 7.3
26 Joseph Gordon-Levitt 🇺🇸 8 3.7 3.4 7.1
27 Will Smith 🇺🇸 8 3.6 3.3 6.9
28 Michaël Youn 🇫🇷 8 2.9 2.2 5.1
29 Ramzy Bedia 🇫🇷 8 2.2 1.9 4.1
30 Jennifer Lawrence 🇺🇸 7 4.0 3.7 7.7
31 Emma Stone 🇺🇸 7 3.7 3.8 7.5
32 Jack Black 🇺🇸 7 3.9 3.6 7.5
33 Anne Hathaway 🇺🇸 7 3.7 3.6 7.3
34 Robert Pattinson 🇺🇸 7 3.9 3.4 7.3
35 John Leguizamo 🇺🇸 7 3.3 3.6 6.9
36 Owen Wilson 🇺🇸 7 3.6 3.3 6.9
37 Kevin Hart 🇺🇸 7 3.3 3.4 6.7
38 Dwayne Johnson 🇺🇸 7 3.2 3.4 6.6
39 Jean Dujardin 🇫🇷 7 3.5 3.0 6.5
40 Kristen Stewart 🇨🇱🇺🇸 7 3.3 2.9 6.2
41 Anna Faris 🇺🇸 7 2.9 2.4 5.3
42 Ewan McGregor 🇺🇸 6 4.5 3.8 8.3
43 Kirsten Dunst 🇺🇸 6 4.4 3.8 8.2
44 Samuel L. Jackson 🇺🇸 6 4.3 3.9 8.2
45 Christian Bale 🇺🇸 6 4.1 4.0 8.1
46 Woody Harrelson 🇺🇸 6 4.2 3.9 8.1
47 Bruce Willis 🇺🇸 6 4.2 3.8 8.0
48 Chris Pine 🇬🇧🇺🇸 6 4.1 3.7 7.8
49 Tom Holland 🇺🇸 6 4.0 3.8 7.8
50 Adam Driver 🇺🇸 6 4.0 3.7 7.7
AListed_Actor <- select(Acteur_merge,Acteur,Grade)
AListed_Actor <- as.data.frame(table(AListed_Actor))

AListed_Actor <- AListed_Actor[as.character(AListed_Actor$Acteur) %in% as.character(head(Acteur,10)$Acteur),]
AListed_Actor <- AListed_Actor[AListed_Actor$Freq>0,]


for (r in 1:nrow(AListed_Actor)){
  AListed_Actor$Total[r] <- sum(AListed_Actor$Freq[AListed_Actor$Acteur==AListed_Actor$Acteur[r]], na.rm=T)
}
AListed_Actor <- AListed_Actor[order(-AListed_Actor$Total),]

AListed_Actor$Grade <- as.numeric(AListed_Actor$Grade)-6
AListed_Actor$Grade <- abs(AListed_Actor$Grade)


AListed_graph <- ggplot(AListed_Actor,aes(x = Acteur, y = Grade))+
  geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
  geom_point(size=AListed_Actor$Freq, color = blue)+
  scale_size_continuous(range=c(0.5,10)) +
  scale_x_discrete(guide = guide_axis(n.dodge=1),
                   limits=as.character(unique(AListed_Actor$Acteur)))+
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Grade", x="Actor")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))+
  coord_flip()
AListed_graph

Acteur <- Acteur %>% arrange(-Freq)

Actor_list <- head(Acteur$Acteur,50) %>% as.character()
Actor_matrix <- matrix(ncol = Actor_list %>% length(), nrow=Actor_list %>% length())
colnames(Actor_matrix) <- Actor_list
rownames(Actor_matrix) <- Actor_list

for (c in 1:ncol(Actor_matrix)){
  for (l in 1:nrow(Actor_matrix)){
    Actor_matrix[l,c] <-
      sum(CritiqueFilm$`Acteur 2`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
    Actor_matrix[l,c] <- Actor_matrix[l,c]+
      sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
    Actor_matrix[l,c] <- Actor_matrix[l,c]+
      sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 2`]==rownames(Actor_matrix)[l],na.rm=T)
  }
}

Actor_matrix <- Actor_matrix %>% as.data.frame()
Actor_matrix$from <- rownames(Actor_matrix) %>% as.character()
Actor_matrix <- Actor_matrix[,c(ncol(Actor_matrix),1:(ncol(Actor_matrix)-1))]


# Transform the adjacency matrix in a long format
connect <- Actor_matrix %>%
  as.data.frame() %>%
  gather(key="to", value="value", -1) %>%
  mutate(to = gsub("\\.", " ",to)) %>%
  na.omit()

connect <- connect[connect$value>0,]

# Number of connection per person
coauth <- c( as.character(connect$from), as.character(connect$to)) %>%
  as_tibble() %>%
  group_by(value) %>%
  summarize(n=n())
colnames(coauth) <- c("name", "n")
#dim(coauth)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Find community
com <- walktrap.community(mygraph)
#max(com$membership)

#Reorder dataset and make the graph
coauth <- coauth %>% 
  mutate( grp = com$membership) %>%
  arrange(grp) %>%
  mutate(name=factor(name, name))

colfunc <- colorRampPalette(c(purple,pink,blue,yellow))

scale_col <- colfunc(max(coauth$grp))

# keep only this people in edges
connect <- connect %>%
  filter(from %in% coauth$name) %>%
  filter(to %in% coauth$name)

# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )

# Make the graph
ggraph(mygraph, layout="linear") + 
  geom_edge_arc(edge_colour=grey, fold=TRUE) +
  geom_node_point(aes(size=n, color=as.factor(grp), fill=grp)) +
  scale_color_manual(values =scale_col)+
  scale_size_continuous(range=c(0.5,5)) +
  geom_node_text(aes(label=name), angle=65, hjust=1, nudge_y = -0.5, size=3) +
  expand_limits(x = c(-1.2, 1.2), y = c(-5,0))+
  theme(text=element_text(size=12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none")

International

cat("
<style>
.leaflet-container {
   background: #FFF;
}
</style>
")
map <- ne_countries() %>% as.data.frame()

write.csv(map$sovereignt,"map_Countries.csv")

map <- ne_countries()
map$freq <- 0

for (s in 1:nrow(map)){
  map$freq[s] <- sum(CritiqueFilm$`Pays d'origine`==map$sovereignt[s], na.rm = T)
  map$best_movie[s] <- CritiqueFilm$`Titre du film`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]], na.rm = T)==CritiqueFilm$`Nombre Classement`]
  map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}

for (s in 1:nrow(map)){
  map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}


map$freq[map$freq==0] <- NA

map$Grade <- case_when(
  map$freq==1 ~ 1,
  map$freq<5 ~ 2,
  map$freq<100 ~ 3,
  map$freq<500 ~ 4,
  !is.na(map$freq) ~ 5,
  is.na(map$freq) ~ 0
)

map$Label <- case_when(
  map$freq==1 ~ "1",
  map$freq<5 ~ "<5",
  map$freq<100 ~ "<100",
  map$freq<500 ~ "<500",
  !is.na(map$freq) ~ ">500",
  is.na(map$freq) ~ "0"
)


pal <- colorBin(
  palette = mycols5, domain = map$Grade,
  bins = seq(0, max(map$Grade, na.rm = TRUE), by = 1)
)

map$labels <- paste0(
  "<strong> Country: </strong> ", map$sovereignt, "<br/> ",
  "<strong> Number of movies seen : </strong> ", round(map$freq,0), "<br/> ",
  "<strong> Best movie for this country : </strong> ", map$best_movie," : ",map$best_movie_rate,"/10", "<br/> "
) %>%
  lapply(htmltools::HTML)

LeafMap <- leaflet(map) %>%
  setMapWidgetStyle(list(background= "white")) %>%
  setView(lng = 0, lat = 30, zoom = 1.3) %>%
  addPolygons(
    fillColor = ~ pal(Grade),
    color = purple,
    weight = 1,
    opacity = 1,
    fillOpacity = 1,
    label = ~labels,
    highlight = highlightOptions(
      color = pink,
      bringToFront = TRUE,
      fill = 1, fillOpacity=1
    )
  ) %>%
  addLegend(
    pal = pal,
    values = ~Grade, #c("0","1","<5","<100","<500",">500"),
    opacity = 1,
    title = "Freq"
  )
LeafMap

Sagas

AListed_Saga <- select(CritiqueFilm,Saga,Grade)
AListed_Saga <- as.data.frame(table(AListed_Saga))

Table_saga <- as.data.frame(table(CritiqueFilm$Saga))
Table_saga <- Table_saga[order(-Table_saga$Freq),]
Table_saga <- Table_saga[Table_saga$Var1 != "Saga", ] 

AListed_Saga <- AListed_Saga[AListed_Saga$Saga %in% as.character(head(Table_saga,10)$Var1),]
AListed_Saga <- AListed_Saga[AListed_Saga$Freq>0,]


for (r in 1:nrow(AListed_Saga)){
  AListed_Saga$Total[r] <- sum(AListed_Saga$Freq[AListed_Saga$Saga==AListed_Saga$Saga[r]], na.rm=T)
}
AListed_Saga <- AListed_Saga[order(-AListed_Saga$Total),]

AListed_Saga$Grade <- as.numeric(AListed_Saga$Grade)-6
AListed_Saga$Grade <- abs(AListed_Saga$Grade)


AListed_graph <- ggplot(AListed_Saga,aes(x = Saga, y = Grade))+
  geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
  geom_point(size=AListed_Saga$Freq, color = blue)+
  scale_x_discrete(guide = guide_axis(n.dodge=2),
                   limits=as.character(unique(AListed_Saga$Saga)))+
  ylim(0,5)+
  scale_size_discrete(range=c(0.5,20)) +
  labs(title="Saga with the best average according to their frequency",
       subtitle="",
       y="Grade", x="Saga")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
AListed_graph

DecadeMin_graph <- 1930
DecadeMax_graph <- 2020

Decade_Grade_Saga <- select(CritiqueFilm,Saga,Année,`Nos notes`)
Decade_Grade_Saga <- Decade_Grade_Saga[Decade_Grade_Saga$Année>=DecadeMin_graph & Decade_Grade_Saga$Année<=DecadeMax_graph,]
Decade_Grade_Saga_table <- as.data.frame(table(Decade_Grade_Saga$Saga))
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(-Decade_Grade_Saga_table$Freq),]
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[Decade_Grade_Saga_table$Var1!="Saga",]
colnames(Decade_Grade_Saga_table)[1] <- "Saga"

for (s in 1:length(Decade_Grade_Saga_table$Saga)){
  Decade_Grade_Saga_table$Grade[s] <- round(mean(CritiqueFilm$`Nos notes`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga],na.rm = T),1)
  Decade_Grade_Saga_table$Presse[s] <- round(mean(CritiqueFilm$`Note Presse`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Diff[s] <- round(mean(CritiqueFilm$Différence[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
  Decade_Grade_Saga_table$Duration[s] <- round(sum(CritiqueFilm$Durée[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T)/60,1)
}

rownames(Decade_Grade_Saga_table) <- 1:length(Decade_Grade_Saga_table$Saga)

knitr::kable(Decade_Grade_Saga_table) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  scroll_box(width = "100%", height = "370px")
Saga Freq Grade Presse Diff Duration
Disney 70 3.9 3.8 0.4 123.1
Marvel 46 4.1 3.6 0.5 110.2
DreamWorks 31 3.8 3.6 0.5 47.3
DC 15 3.8 3.5 0.6 42.9
Ghibli 12 3.5 3.9 0.5 22.2
Star Wars 12 4.5 3.8 0.7 26.5
Harry Potter 10 4.3 3.8 0.5 28.0
American Pie 8 2.8 2.5 0.4 12.9
James Bond 6 4.3 3.6 0.8 15.8
Jurassic Park 5 4.3 3.3 0.9 12.4
L’Âge de glace 5 3.2 3.4 0.3 8.8
Pirates des Caraïbes 5 4.6 3.8 0.8 12.1
Pokémon 5 3.3 3.0 0.5 8.4
Saw 5 3.1 2.8 0.5 8.2
Twilight 5 3.6 3.1 0.5 10.2
Astérix 4 3.9 3.3 0.6 6.5
Hunger Games 4 4.2 3.6 0.5 9.1
La Planète des Singes 4 4.7 4.0 0.7 8.1
La Terre du Milieu 4 4.9 4.3 0.6 14.0
Scary Movie 4 2.7 2.2 0.5 5.7
Transformers 4 3.4 3.1 0.5 9.4
American Nightmare 3 3.4 3.1 0.4 4.9
Ducobu 3 2.6 2.0 0.6 4.7
Fast & Furious 3 2.4 2.8 0.5 5.8
Hellboy 3 3.3 2.8 0.6 6.0
Hôtel Transylvanie 3 3.6 3.6 0.1 6.2
John Wick 3 4.2 3.6 0.5 5.9
Jumanji 3 3.9 3.5 0.3 5.8
Klapisch 3 2.9 3.7 0.8 6.1
La Nuit au Musée 3 3.6 3.0 0.6 5.2
Les Schtroumpfs 3 3.3 2.9 0.4 5.0
Lucky Luke 3 2.8 1.6 1.2 4.6
Moi, Moche et Méchant 3 3.9 3.8 0.2 4.7
Mon beau-père et moi 3 3.2 3.1 0.2 5.3
Narnia 3 3.5 3.2 0.4 6.8
Retour vers le futur 3 4.6 4.3 0.4 5.7
Sherlock Holmes 3 4.3 3.9 0.7 6.3
Star Trek 3 4.7 3.8 0.9 6.4
Tarantino 3 4.7 4.1 0.5 7.8
Very Bad Trip 3 4.0 3.4 0.6 5.0
300 2 4.0 3.6 0.4 3.6
Babysitting 2 3.9 3.8 0.1 3.0
Borat 2 2.8 2.7 0.1 2.9
Comme des bêtes 2 3.0 3.6 0.6 2.9
Comment tuer son boss? 2 3.2 2.9 0.4 3.4
Destination finale 2 3.0 2.7 0.3 2.9
Dr. Seuss 2 3.4 3.3 0.7 3.2
Happy Feet 2 4.3 3.2 1.2 3.5
Jump Street 2 3.5 3.5 0.0 3.7
Kingsman 2 4.8 4.0 0.8 6.7
Le Choc des Titans 2 4.2 2.4 1.8 3.4
Les Nouvelles Aventures 2 1.3 1.5 0.2 3.4
Matrix 2 4.2 4.0 0.3 4.6
Nos pires voisins 2 3.4 2.6 0.8 3.1
OSS 117 2 3.8 3.3 0.5 5.2
Papa ou Maman 2 3.6 3.5 0.2 3.0
Percy Jackson 2 3.3 2.7 0.6 3.8
Red 2 3.9 3.5 0.4 3.8
Rio 2 3.9 3.8 0.4 3.2
Sister Act 2 3.2 3.5 0.2 3.5
Ted 2 3.8 3.3 0.5 3.7
Zombieland 2 3.9 3.9 0.2 3.1
Agatha Christie 1 4.2 3.3 0.9 4.1
Alien 1 4.0 3.3 0.7 2.1
Asimov 1 3.7 3.7 0.0 1.7
Assassin’s Creed 1 3.6 2.9 0.7 1.9
Blade Runner 1 2.8 4.2 1.4 2.0
Breaking Bad 1 4.2 3.7 0.5 2.0
Dernier train pour Busan 1 3.9 4.1 0.2 2.0
Dragon Quest 1 4.1 3.8 0.3 1.7
Ghost in the Shell 1 3.2 3.3 0.1 1.8
Happy Birthdead 1 3.7 3.3 0.4 1.6
Hawking 1 4.2 4.3 0.1 2.0
His Dark Materials 1 4.0 2.8 1.2 1.9
Jackass 1 1.0 2.2 1.2 1.4
Kick-Ass 1 5.0 4.0 1.0 2.0
La Tour Montparnasse 1 3.4 2.4 1.0 1.5
Le Labyrinthe 1 3.8 3.8 0.0 1.9
Le Petit Nicolas 1 2.4 2.4 0.0 1.6
Lego 1 3.8 3.0 0.8 1.2
LEGO 1 3.6 4.0 0.4 1.7
Les Visiteurs 1 1.0 1.5 0.5 1.8
Limitless 1 5.0 3.9 1.1 1.8
Mad Max 1 4.6 4.2 0.4 2.0
Matt Groening 1 4.4 3.8 0.6 1.4
Men in Black 1 3.4 2.5 0.9 1.9
MonsterVerse 1 4.2 3.8 0.4 3.1
Mythologie 1 4.2 3.6 0.6 2.7
Ocean 1 3.0 3.0 0.0 1.8
Orelsan 1 3.2 3.4 0.2 1.5
Prince of Persia 1 4.4 3.3 1.1 1.9
Sans un bruit 1 4.3 3.8 0.6 3.1
Seuls 1 2.4 2.3 0.1 1.6
Sonic 1 3.4 3.3 0.1 1.6
Tintin 1 4.0 3.6 0.4 1.8
Titeuf 1 3.8 3.1 0.7 2.7
Transperceneige 1 4.6 3.5 1.1 2.1
Decade_Grade_Saga_table_top <- head(Decade_Grade_Saga_table,10)
Decade_Grade_Saga <- Decade_Grade_Saga[as.character(Decade_Grade_Saga$Saga) %in% as.character(Decade_Grade_Saga_table_top$Saga),]

Year_Grade_graph <- ggplot(Decade_Grade_Saga, aes(Année, Saga)) +
  geom_bin2d(binwidth = c(10,1))+
  scale_x_continuous(breaks = seq(DecadeMin_graph, DecadeMax_graph, 10))+
  scale_fill_gradient(low=purple, high = blue)+
  labs(y="Number of films",x="Decade")+
  labs(title="Count of films per Saga and Decades",
       subtitle=paste0("from ",DecadeMin_graph," to ",DecadeMax_graph),
       y="Saga", x="Decade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph

Decade_Grade_Saga_table <- head(Decade_Grade_Saga_table,15)

Saga_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Grade, y = Duration))+
  geom_vline(xintercept = mean(Decade_Grade_Saga_table$Grade,na.rm = T),
            col = grey)+
  geom_text(aes(x=mean(Grade,na.rm = T)-0.1,label=round(mean(Grade,na.rm = T),2), y=max(Duration)+1),
            colour=grey,
            angle=0,
            vjust = 1.2,
            family="AvertaPE-Regular",
            size = 9/.pt)+
  geom_point(aes(colour = Grade > mean(Grade,na.rm = T)))+
  xlim(min(Decade_Grade_Saga_table$Grade),5.2)+
  geom_text(aes(label = paste(Saga,Grade),colour = Grade > mean(Grade,na.rm = T)),
            hjust=-0.1,
            vjust=-0.2,
            family="AvertaPE-Regular",
            size = 9/.pt,
            check_overlap = T) +
  scale_color_manual(values=mycols2) +
  labs(title="Actor with the best average according to their frequency",
       subtitle="",
       y="Duration", x="Grade")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Saga_graph

CritiqueFilm$Durée <- round(CritiqueFilm$Durée/2,0)*2
CritiqueFilm$Durée[CritiqueFilm$Durée > min(boxplot.stats(CritiqueFilm$Durée)$out)] <- NA

Duration_graph <- ggplot(CritiqueFilm, aes(x = Durée, y = `Nos notes`))+
  geom_bar(stat = "summary", fun = "mean", fill=purple)+
  geom_smooth(color=pink)+
  labs(title="Average rating per film duration",
       y="Grade", x="Duration (min)")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Duration_graph

Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(Decade_Grade_Saga_table$Diff),]

AListed_Saga_Table_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Diff, y = Saga))+
  geom_bar(stat = "summary", fun = "sum", fill=purple)+
  geom_vline(xintercept = 0, col = blue)+
  scale_y_discrete(guide = guide_axis(n.dodge=1),
                   limits=as.character(unique(Decade_Grade_Saga_table$Saga)))+
  labs(title="Difference between our Grades and Press' Grades",
       y="Saga", x="Difference")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

AListed_Saga_Table_graph

Consommation

Weekdays_table <- weekdays(as.Date(CritiqueFilm$`Dernier visionnage`))
Weekdays_order <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

Weekdays_table <- as.data.frame(Weekdays_table)
colnames(Weekdays_table)[1] <- "Days"

Weekdays_table <- Weekdays_table %>% filter(!is.na(Weekdays_table$Days))

Weekdays_graph <- ggplot(Weekdays_table, aes(x=Days))+
  geom_bar(stat = "count", fill = purple) +
  scale_x_discrete(limits = Weekdays_order)+
  labs(title="Count of films per day of the week",
       y="Count", x="Days")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Weekdays_graph

Month_table <- as.numeric(format(as.Date(CritiqueFilm$`Date de sortie`, format = "%Y-%m-%d"), "%m"))
Month_table <- as.data.frame(table(Month_table))
Month_table$Freq <- round(Month_table$Freq*100/sum(Month_table$Freq),1)

Month_graph <- ggplot(Month_table, aes(x=Month_table, y=Freq))+
  geom_hline(yintercept = 100/12,col = grey)+
  geom_bar(stat = "identity", fill=grey, width = 0.01)+
  geom_point(size = 2, color = blue)+
  geom_text(aes(label = paste0(Freq,"%")),
            size=3,
            hjust=0.5,
            vjust=-1,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_x_discrete(limits = 1:12, label = month.abb[1:12])+
  labs(title="Percentage of films seen per month",
       y="Percent", x="Month")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Month_graph

# library(ggraph)
# library(igraph)
# library(tidyverse)
# library(viridis)
# 
# 
# Distribution <- select(CritiqueFilm,`Maison mère`,`Maison de distribution`)
# Distribution2 <- Distribution %>% count(`Maison de distribution`)
# 
# for (d in 1:nrow(Distribution2)){
#   Distribution2$`Maison mère`[d] <- Distribution$`Maison mère`[Distribution2$`Maison de distribution`[d]==Distribution$`Maison de distribution`]
# }
# Distribution2
# 
# Distribution2 <- Distribution2[Distribution2$n>=3,]
# Distribution2 <- Distribution2[Distribution2$`Maison mère`!="Autre",]
# 
# Distribution2$`Maison mère` <- str_replace_all(Distribution2$`Maison mère`," ","")
# Distribution2$`Maison de distribution` <- str_replace_all(Distribution2$`Maison de distribution`," ","")
# 
# Distribution2$name <- paste0("Distribution.",Distribution2$`Maison mère`,".",Distribution2$`Maison de distribution`)
# Distribution2$from <- paste0("Distribution.",Distribution2$`Maison mère`)
# 
# vertices <- Distribution2 %>% select(name,n,`Maison de distribution`)
# colnames(vertices) <- c("name","size","shortName")
# 
# edges <- Distribution2 %>% select(from,name)
# colnames(edges) <- c("from","to")
# 
# d1 <- data.frame(from="origin", to=paste("group", seq(1,10), sep=""))
# d2 <- data.frame(from=rep(d1$to, each=10), to=paste("subgroup", seq(1,100), sep="_"))
# hierarchy <- rbind(d1, d2)
# 
# vertices <- data.frame(name = unique(c(as.character(hierarchy$from), as.character(hierarchy$to))) ) 
# 
# vertices$id <- NA
# myleaves <- which(is.na( match(vertices$name, edges$from) ))
# nleaves <- length(myleaves)
# vertices$id[ myleaves ] <- seq(1:nleaves)
# vertices$angle <- 90 - 360 * vertices$id / nleaves
# vertices$hjust <- ifelse( vertices$angle < -90, 1, 0)
# vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
# 
# mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
# 
# ggraph(mygraph, layout = 'dendrogram', circular = TRUE) + 
#   geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05)) +
#   geom_conn_bundle(data = get_con(from = from, to = to), alpha=0.2, colour="skyblue", width=0.9) +
#   geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=1.5, alpha=1) +
#   theme_void() +
#   theme(
#     legend.position="none",
#     plot.margin=unit(c(0,0,0,0),"cm"),
#   ) +
#   expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))

Conclusion

#Les Derniers films vus
Top <- 10

Lastfilms <- CritiqueFilm[!is.na(CritiqueFilm$`Dernier visionnage`),]
Lastfilms <- select(Lastfilms,`English Title`,`Date de sortie`,`Pays d'origine`, Réalisateur,`Notes cummulées`,`Dernier visionnage`)
colnames(Lastfilms) <- c("Title","Date","Country","Director","Grade","Last Visio")
Lastfilms$`Last Visio` <- as.Date(as.POSIXct(Lastfilms$`Last Visio`))
Lastfilms$Date <- format(as.Date(Lastfilms$Date, "%m/%d/%y"),"%b %Y")

Lastfilms$Visio_num <- as.numeric(Lastfilms$`Last Visio`)
Lastfilms <- Lastfilms[order(-Lastfilms$Visio_num),]
Lastfilms <- head(Lastfilms,Top)
Lastfilms <- select(Lastfilms, -Visio_num)
Lastfilms$Country[Lastfilms$Country=="United States of America"] <- "USA"


Lastfilms %>%
  mutate(Grade = color_tile(blue, purple)(Grade)) %>% 
  kable(escape = F, align = c("l", "c", "c", "l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(5, bold = T, color = white)
Title Date Country Director Grade Last Visio
X-Men: Days of Future Past May 2014 USA Bryan Singer 9.0 2022-10-21
Mulan 2 : La Mission de l’Empereur Feb 2005 USA Lynne Southerland & Darrell Rooney 5.1 2022-10-20
Go West: A Lucky Luke Adventure Dec 2007 France Olivier Jean-Marie 6.2 2022-10-14
The Wolverine Jul 2013 USA James Mangold 7.1 2022-10-09
Chicken Little Dec 2005 USA Mark Dindal 5.0 2022-10-09
X-Men: First Class May 2011 USA Matthew Vaughn 8.7 2022-10-08
Beauty and the Beast Oct 1992 USA Gary Trousdale et Kirk Wise 8.1 2022-10-08
X-Men Origins: Wolverine Apr 2009 USA Gavin Hood 6.4 2022-10-07
Super Apr 2011 USA James Gunn 7.0 2022-10-04
Le crocodile du Botswanga Feb 2014 France Lionel Steketee et Fabrice Éboué 5.2 2022-10-04
DA <- select(CritiqueFilm,`Date de sortie`,DA)

DA$`Date de sortie` <- as.numeric(format(as.Date(DA$`Date de sortie`, format = "%Y-%m-%d"), "%Y"))
DA$`Date de sortie` <- round(DA$`Date de sortie`/10,0)*10

DA <- as.data.frame.matrix(table(DA))
colnames(DA) <- c("Total","Anime")
DA$Decades <- rownames(DA)
DA$Total <- DA$Total+DA$Anime

DA_graph <- ggplot(DA)+
  geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
  geom_area(aes(x = Decades, y = Anime, fill = "Movies seen"))+
  geom_label(aes(x = Decades, y = Anime, label = paste(Anime)),
            fill=purple,
            colour = white,
            #size=3,
            hjust=0.5,
            vjust=-2,
            family="AvertaPE-Regular",
            check_overlap = T) +
  scale_fill_manual(values=c(purple, blue))+
  labs(y="Number of films",x="Decade")+
  theme(text=element_text(size=12,family="AvertaPE-Regular"),
        legend.title = element_blank(),
        legend.position = "top",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = blue))
## Warning: Ignoring unknown parameters: check_overlap
DA_graph

library(rvest)
library(stringr)

load("ToBuy.Rda")

if(max(ToBuy$Date)<(Sys.Date() %m-% days(7))){
  ToBuy <- CritiqueFilm$`Titre du film`[CritiqueFilm$`A acheter`=="A acheter"]
  ToBuy <- as.data.frame(ToBuy[!is.na(ToBuy)])
  ToBuy <- as.data.frame(strsplit(ToBuy[,1]," - "))
  ToBuy <- as.data.frame(t(ToBuy[2,]))
  ToBuy <- ToBuy$`2`
  ToBuy <- as.data.frame(ToBuy)
  colnames(ToBuy) <- "ToBuy"
  ToBuy$Link <- paste0("https://www.amazon.fr/s?k=",URLencode(ToBuy$ToBuy),"+blu-ray")
  
  for (b in 1:length(ToBuy$Link)){
    url <- ToBuy$Link[b]
    website <- read_html(url)
    ToBuy$Price[b] <- html_text(html_nodes(website,".a-price-whole"))[1]
    ToBuy$Name[b] <- html_text(html_nodes(website,".s-line-clamp-4"))[1]
  }
  
  ToBuy$Price <- as.numeric(str_replace(ToBuy$Price,",","."))
  ToBuy$Date <- Sys.Date()
  save(ToBuy,file="ToBuy.Rda")}

load("ToBuy.Rda")

ToBuy <- ToBuy[order(ToBuy$Price),]
ToBuy$Price[ToBuy$Price>=30] <- NA
ToBuy$Price[ToBuy$Price<4] <- NA
ToBuy <- ToBuy[!is.na(ToBuy$Price),]

ToBuy$Price_rounded <- round(ToBuy$Price/2)*2
Bluray <- table(ToBuy$Price_rounded)
Bluray <- as.data.frame(Bluray)
Bluray$Var1 <- as.numeric(as.character(Bluray$Var1))

Bluray_graph <- ggplot(Bluray, aes(x=Var1, y=Freq))+
  geom_bar(stat = "identity", fill=purple, width = 0.01)+
  geom_point(size = 3, color = blue)+
  geom_text(aes(label = paste0(Freq)),
            size=3,
            hjust=0.5,
            vjust=-2,
            family="AvertaPE-Regular",
            check_overlap = T) +
  ylim(0,15)+
  theme(text=element_text(size=12,family="AvertaPE-Regular"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "none",
        axis.line = element_line(colour = blue))+
  labs(title="Number of BluRay to buy",
       y="Count", x="Price")
Bluray_graph
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).

ToBuyTop <- ToBuy[1:10,c(1,3)]
ToBuyTop <- as.data.frame(ToBuyTop)
rownames(ToBuyTop) <- 1:10

ToBuyTop %>%
  mutate(Price = color_tile(blue, purple)(Price)) %>% 
  kable(escape = F, align = c("l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  column_spec(2, bold = T, color = white)
ToBuy Price
Dragons 2 6.00
Kung Fu Panda 3 6.00
L’Âge de Glace 1 6.19
Patients 6.20
Les Nouveaux Héros 7.37
Sully 7.70
The King’s Man : Première Mission 7.81
Les Douze Travaux d’Astérix 7.99
Imitation Game 8.24
Get Out 8.38

Year_graph_DB <- select(CritiqueFilm,`Date de sortie`,`Notes cummulées`,Grade,Saga)
Year_graph_DB <- Year_graph_DB[Year_graph_DB$`Date de sortie`>as.Date("1985-01-01"),]

Year_graph_DB$Saga[!Year_graph_DB$Saga %in% (Decade_Grade_Saga$Saga %>% unique() %>% head(5))] <- "Trend"

Year_graph <- ggplot(Year_graph_DB, aes(x=`Date de sortie`,y=`Notes cummulées`/2)) +
  geom_point(colour="#F2F2F2")+
  ylim(0,5)+
  scale_size_continuous(range=c(0.1,0.5))+
  geom_smooth(aes(group=Saga, col = Saga), method = lm, formula = y ~ splines::bs(x, 4), se = FALSE)+
  scale_color_manual(values=mypal(6)) +
  labs(y="Number of films",x="Year")+
  labs(title="Count of films per Year",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Year_graph

#I import a picture and I set the size and the float
knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/Théotime/PhotoCV.svg")
## Warning in knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"): It is highly recommended to use relative paths for
## images. You had absolute paths: "/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"

Analysis conducted by Théotime Bourgeois

Master of Science - Data Science & Organizational Behavior

by Burgundy School of Business

Instagram LinkedIn


Oscar <- NamesFilm[!is.na(NamesFilm$Oscar),]
Oscar <- Oscar[Oscar$Année>=YearMin_graph,]
Oscar$OscarTF <- Oscar$Oscar %>% str_detect("Oscar")


ggplot(Oscar, aes(x = Année, y = `Notes cummulées`))+
  geom_count(colour = purple)+
  geom_point(data = Oscar %>% filter(OscarTF == TRUE), colour = blue)+
  scale_size("Count", range = c(1, 6))+
  #stat_summary(aes(y = `Notes cummulées`,group = 1), fun=mean, colour=yellow,geom="line")+
  geom_smooth(aes(group=OscarTF, col = OscarTF), method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
  scale_color_manual("Winner", values=c(purple,blue)) +
  labs(y="Number of films",x="Year")+
  labs(title="Evolution of the scores of the films presented at the Oscars",
       subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))


Distri_Circle <- table(NamesFilm$`Maison de distribution`) %>%
  as.data.frame() %>%
  `colnames<-`(c("Maison de distribution","Freq"))

Distri_Circle <- merge(Distri_Circle,
      NamesFilm %>% select(`Maison de distribution`,`Maison mère`) %>% unique())

Distri_Circle <- Distri_Circle %>%
  mutate(root="root") %>%
  filter(!is.na(`Maison mère`)) %>%
  select(root,`Maison mère`,`Maison de distribution`,Freq) %>% 
  `colnames<-`(c("root","group","subgroup","value")) %>% 
  filter(group != "France") %>% 
  filter(group != "Autre") %>%
  filter(value > 10)


Distri_Circle$subgroup <- paste0(Distri_Circle$subgroup," (",Distri_Circle$value,")")

Distri_Circle$pathString <- paste("world", Distri_Circle$group, Distri_Circle$subgroup, sep = "/")
population <- as.Node(Distri_Circle)

# Make the plot
#circlepackeR(population, size = "value")

# You can custom the minimum and maximum value of the color range.
p <- circlepackeR(population, size = "value", color_min = "hsl(240, 31%, 25%)", color_max = "hsl(0, 0%, 0%)")
saveWidget(p, file="circles.html")
# p
Top <- 5

Top_Acteur <- Acteur$Acteur %>%
  head(Top) %>% 
  as.character()

Acteur_merge_Top <- Acteur_merge %>% filter(Acteur_merge$Acteur %in% Top_Acteur)



Acteur_Top_graph <-
  ggplot(Acteur_merge_Top, aes(x=`Date de sortie` ,y=`Nos notes`,col = Acteur))+
  geom_point()+
  geom_smooth(aes(group=Acteur),
              method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
  scale_color_manual(values=mypal(Top)) +
  labs(title="Count of films per Year",
       subtitle=paste0("from ",
                       Acteur_merge_Top$`Date de sortie` %>%
                         format("%Y") %>%
                         as.numeric() %>%
                         min(),
                       " to ",
                       YearMax_graph),
       y="Grade", x="Year")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))
Acteur_Top_graph

Acteur_Proj <- rbind(
  NamesFilm %>% select(`Acteur 1`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
  NamesFilm %>% select(`Acteur 2`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
  NamesFilm %>% select(`Acteur 3`,Seen) %>% `colnames<-`(c("Acteur","Seen"))) %>%
  table() %>%
  as.data.frame.matrix() %>%
  arrange(`TRUE`) %>% 
  arrange(desc(`FALSE`)) %>%
  filter(`TRUE`!=0) %>% 
  head(10)

Acteur_Proj <- Acteur_Proj %>% 
  mutate(Acteur = rownames(Acteur_Proj)) %>% 
  select(Acteur, `TRUE`, `FALSE`) %>% 
  `colnames<-`(c("Acteur","Seen","NotSeen")) %>% 
  mutate(Total = Seen+NotSeen) %>%
  arrange(desc(Seen)) %>% 
  arrange(desc(Total)) %>% 
  mutate(Acteur = fct_reorder(Acteur,Total))

rownames(Acteur_Proj) <- 1:nrow(Acteur_Proj)

ggplot(Acteur_Proj, aes(y = Acteur))+
  geom_segment(aes(x = 0, xend = Total, yend = Acteur, col = "Not Seen"), size = 9)+
  geom_segment(aes(x = 0, xend = Seen, yend = Acteur, col = "Seen"), size = 6)+
  geom_point(aes(x = Seen, col = "Seen"), size = 5)+
  geom_point(aes(x = Total, col = "Not Seen"), size = 8)+
  scale_color_manual(values = c(purple, pink))+
  geom_text(aes(x=Seen, label = Seen), col = white, family = "AvertaPE-Black")+
  geom_text(aes(x=Total, label = Total), col = white, family = "AvertaPE-Black")+
  labs(title="Title",
       subtitle="Test",
       y=NULL, x="Count",
       col = "Movies")+
  theme(text=element_text(size=12, family="AvertaPE-Regular"),
        panel.background = element_blank(),
        legend.position = "right",
        legend.background = element_blank(),
        axis.line = element_line(colour = purple),
        plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
        plot.caption = element_text(size = 10, color = blue))

CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"

pastel <- c("#9B553A",
            "#3F4A4D",
            "#728989",
            "#9FB9AC",
            #"#CEAF65",
            "#847359",
            "#6D836E",
            "#455C46",
            #"#F5DAA1",
            "#E59B97",
            "#9E6B66",
            "#513136")
colpastel <- colorRampPalette(pastel)

CritiqueFilm <- separate(data = CritiqueFilm, col = `Meilleure film/année`, into = c("RangAnnée", "Année"), sep = "-")

Top100 <- CritiqueFilm %>%
  arrange(Rang) %>% 
  head(100) %>% 
  arrange(`Date de sortie`) %>% 
  mutate(ID = 1,
         ID = cumsum(ID))

world <- map_data('world')
world <- ne_countries(scale = "medium", returnclass = "sf")
world$color[world$sovereignt %in% Top100$`Pays d'origine`] <- world$sovereignt[world$sovereignt %in% Top100$`Pays d'origine`]

world <- world[world$sovereignt!="Antarctica",]

Map_graph <- ggplot(data = world) +
  geom_sf(aes(fill=color), color=NA)+
  scale_fill_manual(values = colpastel(world$color %>% unique() %>% length()-1), na.value="#CEAF65")+
  theme(panel.background = element_rect(fill = "#E8E8DC"),
        plot.background = element_rect(fill = "#E8E8DC"),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.x = element_blank(),
        axis.ticks = element_blank(),
        text = element_blank())

Country <- Top100$`Pays d'origine` %>%
  unique() %>%
  as.data.frame() %>%
  mutate(Freq = 3)

Country$.[Country$.=="United States of America"] <- "USA"

Country_graph <- ggplot(Country, aes(., Freq)) +
  geom_col(aes(fill = .), position = 'stack', width = 1) +
  scale_fill_manual(values = colpastel(7)) +
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "black"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())+
  scale_y_continuous(limits = c(-20, max(Country$Freq))) +
  coord_curvedpolar()

ggsave(file="Map_graph.svg", plot=Map_graph, width=10, height=8)
ggsave(file="MCountry_graph.png", plot=Country_graph, width=10, height=8)

Année <- CritiqueFilm %>%
  select(Année,`Pays d'origine`,Grade) %>%
  filter(Grade=="A",`Pays d'origine`%in% (c(Country$.,"United States of America"))) %>%
  arrange(Année) %>% 
  mutate(ID = 1,
         ID = cumsum(ID))

AnnéeTable <- Année %>% filter(Année==Top100$Année[1]) %>% mutate(ID=1)

for (year in 2:nrow(Top100)){
  AnnéeTable <- rbind(AnnéeTable,Année %>% filter(Année==Top100$Année[year]) %>% mutate(ID=year))
}

AnnéeGraph <- ggplot(AnnéeTable)+
  geom_bar(aes(x= ID, fill = `Pays d'origine`),position="fill")+
  scale_fill_manual(values = colpastel(7))+
  coord_polar()+
  scale_x_continuous(limits = c(-4, 104))+
  scale_y_continuous(limits = c(-7, 1))+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="AnnéeGraph.svg", plot=AnnéeGraph, width=10, height=8)

Category <- Top100 %>%
  select(`English Title`, ID, Scénario, `Acteurs / Personnages`, `Ambiance / Concept`, `Aspect Visuel`, `Aspect Sonore`) %>% 
  pivot_longer(cols=3:7, names_to = "Category", values_to = "Grade")

Category <- merge(Category,
    Category$Category %>%
      unique() %>%
      as.data.frame() %>%
      mutate(CategoryID = 1,
             CategoryID = cumsum(CategoryID)) %>%
      `colnames<-`(c("Category","CategoryID")),
    by = "Category") %>% 
  filter(Grade==5)

Category_graph <- ggplot()+
  geom_bin2d(data = Category, aes(x = ID, y=CategoryID, fill=Category),binwidth = c(1, 1))+
  coord_polar()+
  xlim(c(-4,104))+
  ylim(c(-20,6))+
  scale_fill_manual(values = colpastel(5))+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Category_graph.svg", plot=Category_graph, width=10, height=8)

Décénie <- Top100 %>%
  select(Décénie) %>% 
  mutate(Freq = 1,
         Décénie = Décénie %>% as.character()) %>% 
  group_by(Décénie) %>% 
  summarise(label=cumsum(Freq),
            Count=max(label)) %>% 
  ungroup() %>% 
  mutate(Freq=1,ID=1,
         ID=cumsum(ID),
         label=ifelse(Count<3,NA,label),
         label = ifelse(label==1,Décénie,NA))
## `summarise()` has grouped output by 'Décénie'. You can override using the
## `.groups` argument.
Décéniegraph <- ggplot(data=Décénie, aes(ID, Freq)) +
  geom_col(aes(fill = Décénie), position = 'stack', width = 1.1) +
  scale_fill_manual(values = colpastel(7)) +
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-15, max(Décénie$Freq))) +
  geom_textpath(aes(x=ID, y=Freq, label=label), vjust=-0.8, hjust=1, color="white", size=3, inherit.aes = FALSE )+
  coord_polar()

ggsave(file="Décéniegraph.svg", plot=Décéniegraph, width=10, height=8)
## Warning: position_stack requires non-overlapping x intervals
## Warning: Removed 95 rows containing missing values (geom_textpath).
angle <-  77 - 333 * (Top100$ID) /100
Top100$hjust<-ifelse( angle < -90, 1, 0)
Top100$angle<-ifelse(angle < -90, angle+180, angle)
Top100$label <- ifelse(angle < -90, paste0(Top100$`English Title`," -",Top100$Année),paste0(Top100$Année,"- ",Top100$`English Title`))

Top100$Décénie <- Top100$Décénie %>% as.character()

y <- 6

Titlegraph <- ggplot(Top100, aes(x=ID, y=y))+
  geom_col(aes(fill = Décénie), position = 'stack', width = 1.1, alpha=.3)+
  geom_text(aes(label = label, y=0.2, hjust=hjust, angle=angle), size=1)+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-10,y)) +
  scale_fill_manual(values = colpastel(7)) +
  coord_polar()+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Titlegraph.svg", plot=Titlegraph, width=10, height=8)
## Warning: position_stack requires non-overlapping x intervals
Réal <- Top100$Réalisateur %>% table() %>% as.data.frame() %>% arrange(desc(Freq)) %>% head(10) %>% `colnames<-`(c("Réalisateur","Freq"))
Réal$Col <- colpastel(10)
Top100 <- merge(Top100,Réal,by="Réalisateur",all=T)

Réalgraph <- ggplot(Top100, aes(x=ID, y=y))+
  geom_text(aes(label = Réalisateur, y=0.2, hjust=hjust, angle=angle, col=Col), size=1)+
  xlim(c(-4,104))+
  scale_y_continuous(limits = c(-10,y)) +
  scale_color_manual(values = colpastel(11)) +
  coord_polar()+
  theme(panel.border = element_blank(),
        panel.background = element_rect(fill = "transparent"),
        legend.position = "none",
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
        axis.title.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank())

ggsave(file="Réalgraph.svg", plot=Réalgraph, width=10, height=8)


# Top100 <- merge(Top100,
#       world %>% as.data.frame() %>% select(sovereignt,iso_a2) %>% unique() %>% `colnames<-`(c("Pays d'origine","iso")),
#       by="Pays d'origine")