library(readxl)
library(tibble)
library(ggplot2)
library(tidyverse)
library(tm)
library(showtext)
library(lubridate)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(officer)
library(dplyr)
library(showtext)
library(tidyr)
library(knitr)
library(kableExtra)
library(cowplot)
library(colorspace)
library(ggrepel)
library(sf)
#library(tmap) # for static and interactive maps
library(leaflet) # for interactive maps
#library(spData)
library(rnaturalearth)
library(leaflet.extras)
library(sp)
library(wbstats)
library(formattable)
library(rvest)
library(XML)
library(BBmisc)
library(xml2)
library(fmsb)
library(colormap)
library(circlize)
library(networkD3)
library(influential) #to create Sankey Diagram
library(igraph) #to create Sankey Diagram
library(oce) #to create Sankey Diagram
library(ggraph) #to create Sankey Diagram
library(devtools) #to add some external libraries
library(addTextLabels)
library(openxlsx)
library(data.tree) #to create a hierarchy
library(htmlwidgets) #to save interative graphs
library(circlepackeR) #to plot circles
library(geomtextpath)
#devtools::install_github("jeromefroe/circlepackeR") # If needed
font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Black.otf")
font_add(family = "AvertaPE-Black.otf",
regular = "AvertaPE-Black.otf")
font_paths("/Users/theotimebourgeois/Library/Fonts/AvertaPE-Regular.otf")
font_add(family = "AvertaPE-Regular.otf",
regular = "AvertaPE-Regular.otf")
purple <- c("#00051E") #c("#2C2C54")
pink <- c("#A40E4C")
blue <- c("#2E86AB")
yellow <- c("#FF9C00")
lila <- c("#E3DFFF")
brown <- c("#C3979F")
grey <- c("#BFBFBF")
white <- c("#FFFFFF")
mycols2 <- c(blue,purple)
mycols3 <- c(purple,pink,blue)
mycols4 <- c(purple,pink,blue,blue,yellow)
mycols5 <- c(white,blue,purple)
allcols <- c(purple,blue,pink,yellow,lila,brown,grey)
mypal <- function(nbcol){
colsample <- allcols[1:nbcol]
return(colsample)
}
Le cinéma ne dit pas autrement les choses, il dit autre chose.
The cinema does not say things differently, it says something else.
Éric Rohmer, French Director
First of all, the following analysis is purely subjective and is in
no way representative of global consumption behaviour.
It is,
however, representative of my cinema consumption since I was 20 years
old and the data has been meticulously collected to arrive at this
conclusion which is a snapshot at a given moment of my cinephilia with
the biases that it generates: I am a young Frenchman who has been more
or less influenced by his choice of films and who obviously has tastes
that cannot be explained but that can be identified.
ToSeeFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Film à voir")
#Nettoyage
CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"
# GlobalInfos <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx",sheet = "Bilan")
NamesFilm <- bind_rows(CritiqueFilm, ToSeeFilm) #Merge of my two databases
NamesFilm <- NamesFilm[!is.na(NamesFilm$`Titre du film`),]
NamesFilm$Seen <- TRUE
NamesFilm$Seen[is.na(NamesFilm$Scénario)] <- FALSE
# NamesFilm$Année <- as.numeric(format(NamesFilm$`Date de sortie`, format = "%Y"))
# NamesFilm$Décénie <- round(NamesFilm$Année/10,0)*10
NamesFilm$`Notes cummulées`[is.na(NamesFilm$`Notes cummulées`)] <- NamesFilm$`Note Presse`[is.na(NamesFilm$`Notes cummulées`)]*2
NamesFilm$Mois <- NamesFilm$`Date de sortie` %>% format("%m") %>% as.numeric()
IMDB <- NamesFilm %>%
filter(is.na(`IMDB ID`),`Pays d'origine`!="France") %>%
select(`English Title`,Année, Réalisateur,`IMDB ID`)
# Nombre de films notés
count_movies_seen <- length(CritiqueFilm$`Titre du film`)
# Nombre de films à voir
count_movies_tosee <- length(ToSeeFilm$`Titre du film`)
# Nombre total de film
count_total <- count_movies_seen + count_movies_tosee
# Le meilleur réalisateur selon nos notes (minimum 3 films)
# Le meilleur réalisateur selon la presse (minimum 3 films)
# Le réalisateur le plus sous-côté (minimum 3 films)
Director_table <- as.data.frame(table(CritiqueFilm$Réalisateur))
Director_table$`Nos notes` <- Director_table$`Note Presse` <- 0
colnames(Director_table)[1] <- "Director"
Director_table <- Director_table[Director_table$Freq>=3,]
for (n in 1:length(Director_table$Director)){
Director_table$`Nos notes`[n] <- mean(CritiqueFilm$`Nos notes`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
Director_table$`Note Presse`[n] <- mean(CritiqueFilm$`Note Presse`[CritiqueFilm$Réalisateur==Director_table$Director[n]], na.rm = T)
}
Director_table$Surcote <- Director_table$`Nos notes`-Director_table$`Note Presse`
Best_director_forme <- as.character(Director_table$Director[Director_table$`Nos notes`==max(Director_table$`Nos notes`)])[1]
Best_director_forpresse <- as.character(Director_table$Director[Director_table$`Note Presse`==max(Director_table$`Note Presse`)])[1]
Surcote_director <- as.character(Director_table$Director[Director_table$Surcote==max(Director_table$Surcote)])[1]
# Le meilleur acteur (minimum 3 films)
# Le deuxième meilleur acteur (minimum 3 films)
# Le troisième meilleur acteur (minimum 3 films)
# L'acteur le plus prolifique
Acteur1 <- select(CritiqueFilm,`Acteur 1`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur2 <- select(CritiqueFilm,`Acteur 2`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
Acteur3 <- select(CritiqueFilm,`Acteur 3`,`Nos notes`,Grade,`Note Presse`,`Emoji Pays`,`Date de sortie`)
colnames(Acteur3)[1] <- colnames(Acteur2)[1] <- colnames(Acteur1)[1] <- "Acteur"
Acteur_merge <- rbind(Acteur1,Acteur2,Acteur3)
rm(Acteur1,Acteur2,Acteur3)
Acteur <- as.data.frame(table(Acteur_merge$Acteur))
colnames(Acteur) <- c("Acteur","Freq")
Acteur <- Acteur[order(-Acteur$Freq),]
Acteur_Max <- as.character(Acteur$Acteur)[1]
for (t in 1:nrow(Acteur)){
Acteur$Notes[t] <- round(mean(Acteur_merge$`Nos notes`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
Acteur$Presse[t] <- round(mean(Acteur_merge$`Note Presse`[Acteur_merge$Acteur==Acteur$Acteur[t]],na.rm = T),1)
}
Acteur$Total <- Acteur$Notes + Acteur$Presse
Acteur <- Acteur[Acteur$Freq>3,]
Acteur <- Acteur[order(-Acteur$Total),]
Best_actor <- as.character(Acteur$Acteur[1:3])
# La meilleure année selon les films notés
Year_data <- as.data.frame(table(CritiqueFilm$Année))
colnames(Year_data)[1] <- "Year"
for (y in 1:length(Year_data$Year)){
Year_data$Note[y] <- mean(CritiqueFilm$`Notes cummulées`[Year_data$Year[y]==CritiqueFilm$Année], na.rm = T)
}
Year_data <- Year_data[Year_data$Freq>=5,]
Best_year <- as.character(Year_data$Year[Year_data$Note==max(Year_data$Note, na.rm = T)])
# Le meilleur mois pour aller voir un film au cinéma en France
Month <- as.data.frame(table(select(CritiqueFilm,Mois)))
for (m in 1:length(Month$Var1)){
Month$Grade[m] <- mean(CritiqueFilm$`Nos notes`[Month$Var1[m]==CritiqueFilm$Mois], na.rm = T)
}
Best_Month <- month.name[as.numeric(Month$Var1[max(Month$Grade)==Month$Grade])]
# La meilleure société de distribution en fonction des notes
Distri <- select(CritiqueFilm,`Maison de distribution`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison de distribution`))
Distri_table <- Distri_table[Distri_table$Freq>3,]
for (m in 1:length(Distri_table$Var1)){
Distri_table$Note[m] <- mean(Distri$`Nos notes`[Distri$`Maison de distribution`==Distri_table$Var1[m]],na.rm = T)
}
Best_distri <- as.character(Distri_table$Var1[max(Distri_table$Note)==Distri_table$Note])
# Maison de distribution avec le plus de parts de marché
Distri <- select(CritiqueFilm,`Maison mère`,`Nos notes`)
Distri_table <- as.data.frame(table(Distri$`Maison mère`))
Distri_table <- Distri_table[Distri_table$Var1!="France",]
Distri_table <- Distri_table[order(-Distri_table$Freq),]
Most_Distri <- as.character(Distri_table$Var1[1])
Most_Distri_Percent <- round(sum(Most_Distri==CritiqueFilm$`Maison mère`, na.rm = T)*100/count_movies_seen,1)
Total_percent <- round(100*count_movies_seen/(count_movies_tosee+count_movies_seen),1)
Duration <- mean(CritiqueFilm$Durée, na.rm = T)
Duration_txt <- Duration/60
Duration_txt <- paste0(as.integer(Duration_txt),"h",round((Duration_txt-as.integer(Duration_txt))*60))
Beginning <- as.Date("2019-09-12")
Count_days <- as.numeric(Sys.Date()-Beginning)
Duration_seen <- (Duration*count_movies_seen/Count_days)
Duration_txt_seen <- Duration_seen/60
Duration_txt_seen <- paste0(as.integer(Duration_txt_seen),"h",round((Duration_txt_seen-as.integer(Duration_txt_seen))*60))
Films_per_day <- round(count_movies_seen/Count_days,2)
Filmtoaddparday <- sum(CritiqueFilm$Année==2018 | CritiqueFilm$Année==2019, na.rm=T)/360
Nb_day <- count_movies_tosee*Duration/Duration_seen
Nb_day2 <- round(Nb_day+Nb_day*Filmtoaddparday,0)
Nb_day <- Sys.Date()+Nb_day
Nb_day2 <- Sys.Date()+Nb_day2
Sub <- paste0("Based on ",count_movies_seen," movies seen")
As a lifelong film enthusiast, I created a database in
September 2019 (1139 days ago) allowing me to track the
films I watch and to structure my cinephilia.
So I have
seen 856 films in the last few years and I have a list of over 1328
films to see. This analysis is therefore evolving!
Who
are my favourite directors? What are the best films according to me and
according to the press? What kind of films are the most represented?
Which actor is the most present in my filmography? All these questions
will be answered in this report! I will start by giving you an overview
of my film consumption and then go into more detail in the dedicated
sections.
The recipe for a good film? Still unknown but if I had to
summarize the 856 films I have seen, this is what I can say:
In order to establish a ranking of films, actors, directors… I
had to decide on some rating criteria that will allow me to evaluate the
main elements that make up a film. So here are the 5 criteria I rate out
of 5:
Explication_Sample <- CritiqueFilm[CritiqueFilm$Grade=="A" & CritiqueFilm$`Note Presse`>=4 & CritiqueFilm$Saga=="Saga" & !is.na(CritiqueFilm$`English Title`) & CritiqueFilm$`Pays d'origine`!="France",]
Explication_Sample <- select(Explication_Sample,`English Title`,Scénario,`Acteurs / Personnages`,`Ambiance / Concept`,`Aspect Visuel`,`Aspect Sonore`)
Explication_Sample <- as.data.frame(Explication_Sample)
Explication_Sample_Scenario <- sample(Explication_Sample$`English Title`[Explication_Sample$Scénario==5],3)
Explication_Sample_Scenario <- paste0(Explication_Sample_Scenario[1],", ",Explication_Sample_Scenario[2]," or ",Explication_Sample_Scenario[3])
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Scenario),]
Explication_Sample_Acteur <- sample(Explication_Sample$`English Title`[Explication_Sample$`Acteurs / Personnages`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Acteur),]
Explication_Sample_Acteur <- paste0(Explication_Sample_Acteur[1],", ",Explication_Sample_Acteur[2]," or ",Explication_Sample_Acteur[3])
Explication_Sample_Ambiance <- sample(Explication_Sample$`English Title`[Explication_Sample$`Ambiance / Concept`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Ambiance),]
Explication_Sample_Ambiance <- paste0(Explication_Sample_Ambiance[1],", ",Explication_Sample_Ambiance[2]," or ",Explication_Sample_Ambiance[3])
Explication_Sample_Visuel <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Visuel`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Visuel),]
Explication_Sample_Visuel <- paste0(Explication_Sample_Visuel[1],", ",Explication_Sample_Visuel[2]," or ",Explication_Sample_Visuel[3])
Explication_Sample_Sonore <- sample(Explication_Sample$`English Title`[Explication_Sample$`Aspect Sonore`==5],3)
Explication_Sample <- Explication_Sample[!(Explication_Sample$`English Title` %in% Explication_Sample_Sonore),]
Explication_Sample_Sonore <- paste0(Explication_Sample_Sonore[1],", ",Explication_Sample_Sonore[2]," or ",Explication_Sample_Sonore[3])
scoring_system <- as.data.frame(matrix(data = NA, nrow = 5, ncol = 2))
scoring_system[,1] <- c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect")
scoring_system[1,2] <- mean(CritiqueFilm$Scénario, na.rm = T)
scoring_system[2,2] <- mean(CritiqueFilm$`Acteurs / Personnages`, na.rm = T)
scoring_system[3,2] <- mean(CritiqueFilm$`Ambiance / Concept`, na.rm = T)
scoring_system[4,2] <- mean(CritiqueFilm$`Aspect Visuel`, na.rm = T)
scoring_system[5,2] <- mean(CritiqueFilm$`Aspect Sonore`, na.rm = T)
scoring_system[,2] <- round(scoring_system[,2],2)
colnames(scoring_system) <- c("Categories","Grade")
scoring_system$ID <- 1:5
scoring_system_graph <- ggplot(scoring_system, aes(x=Categories, y=Grade))+
geom_hline(yintercept = mean(scoring_system[,2]),col = grey)+
geom_segment( aes(x=ID, xend=ID, y=3, yend=Grade), col = blue)+
geom_point(size = 3, color = purple, fill = "white",shape=21, stroke=2)+
geom_text(aes(label = Grade),
size=3,
hjust=0.5,
vjust=-1.5,
family="AvertaPE-Regular",
check_overlap = T) +
scale_x_discrete(guide = guide_axis(n.dodge=2),
limits = c("Scenario","Actors and characters","Atmosphere and concept","Visual aspect","Sound aspect"))+
ylim(3,4.5)+
labs(title="Average of grades per Categorie")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "bottom",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
scoring_system_graph
The scenario: Essential for a good film, it
keeps us on the edge of our seats, makes us passionate, questions us and
is in my opinion the most important. A film with an impeccable visual
quality without a script will remain a bad film. Here are for example 3
films that I evaluated with an excellent script: 1917, Shutter
Island or Forrest Gump
Actors and characters: This category is an
indissociable part of the rating system and allows us to identify
whether the casting is successful and therefore whether the actors are
good and correspond perfectly to the character they play. This category
is obviously rated higher than the others since the actors contribute
most to the credibility of a film and most of the time give their best
as in : Gone Girl, Interstellar or Hachi: A Dog’s Tale
Atmosphere and concept: Each film has its own
universe that can transport us and sometimes we want to see more… or
not! The atmosphere of the film allows us to stay hooked to the plot and
to feel unique emotions. The concept allows innovation in an environment
that we think is already saturated but we will see that many recent
films have really new concepts like : The Grand Budapest Hotel,
The Green Mile or The Prestige
Visual aspect: The aesthetics of the film is a
central element. The visual aspect consists in evaluating the visual
beauty of the film, its risk-taking, its camera movements, its editing,
its special effects, its photography etc. Here are some films with an
interesting visual aspect: Ready Player One, The Curious Case of
Benjamin Button or Your Name.
Sound aspect: Finally, the sound aspect echoes
the atmosphere of the film as it includes both the soundtrack and all
the work done on sound, sound effects etc. to make it all coherent.
Although the soundtrack has a central place in the evaluation of this
criterion, some films enjoy quite incredible sound effects that
sometimes absorb the musical theme. Here are 3 films with impeccable
sound effects: Titanic, The Imitation Game or Whiplash
TopFilms <- CritiqueFilm[str_detect(CritiqueFilm$`Noté par`, "Théotime", negate = FALSE),]
TopFilms <- select(TopFilms,`English Title`,Année,`Emoji Pays`, Réalisateur,`Notes cummulées`)
TopFilms <- TopFilms[order(-TopFilms$`Notes cummulées`),]
colnames(TopFilms) <- c("Title","Year","Country","Director","Grade")
Top <- 100
TopFilms <- head(TopFilms,Top)
TopFilms$Country[TopFilms$Country=="United States of America"] <- "USA"
TopFilms$Rank <- 1:Top
TopFilms <- TopFilms %>% select(6,1:5)
TopFilms %>%
mutate(Grade = color_tile(blue, purple)(Grade)) %>%
kable(escape = F, align = c("c","l", "c", "c", "l", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(2, bold = T) %>%
column_spec(6, bold = T, color = white) %>%
scroll_box(width = "100%", height = "400px")
| Rank | Title | Year | Country | Director | Grade |
|---|---|---|---|---|---|
| 1 | Forrest Gump | 1994 | 🇺🇸 | Robert Zemeckis | 9.6 |
| 2 | The Dark Knight | 2008 | 🇺🇸 | Christopher Nolan | 9.5 |
| 3 | The Lion King | 1994 | 🇺🇸 | Roger Allers | 9.5 |
| 4 | Joker | 2019 | 🇺🇸 | Todd Philips | 9.5 |
| 5 | The Green Mile | 2000 | 🇺🇸 | Frank Darabont | 9.5 |
| 6 | The Lord of the Rings: The Fellowship of the Ring | 2001 | 🇺🇸 | Peter Jackson | 9.5 |
| 7 | The Lord of the Rings: The Two Towers | 2002 | 🇺🇸 | Peter Jackson | 9.5 |
| 8 | The Lord of the Rings: The Return of the King | 2003 | 🇺🇸 | Peter Jackson | 9.5 |
| 9 | Pulp Fiction | 1994 | 🇺🇸 | Quentin Tarantino | 9.5 |
| 10 | 1917 | 2020 | 🇬🇧 | Sam Mendes | 9.4 |
| 11 | Bohemian Rhapsody | 2018 | 🇺🇸 | Bryan Singer | 9.4 |
| 12 | Dune | 2021 | 🇺🇸 | Denis Villeneuve | 9.4 |
| 13 | Interstellar | 2014 | 🇺🇸 | Christopher Nolan | 9.4 |
| 14 | Spider-Man: Into the Spider-Verse | 2018 | 🇺🇸 | Peter Ramsey | 9.4 |
| 15 | Zack Snyder’s Justice League | 2021 | 🇺🇸 | Zack Snyder | 9.3 |
| 16 | Soul | 2020 | 🇺🇸 | Pete Docter | 9.3 |
| 17 | Kingsman: The Secret Service | 2015 | 🇺🇸 | Matthew Vaughn | 9.3 |
| 18 | Léon: The Professional | 1994 | 🇫🇷 | Luc Besson | 9.3 |
| 19 | Slumdog Millionaire | 2009 | 🇬🇧 | Danny Boyle | 9.3 |
| 20 | Titanic | 1998 | 🇺🇸 | James Cameron | 9.3 |
| 21 | Toy Story 3 | 2010 | 🇺🇸 | Lee Unkrich | 9.2 |
| 22 | How to Train Your Dragon | 2010 | 🇺🇸 | Dean DeBlois | 9.2 |
| 23 | Inception | 2010 | 🇺🇸 | Christopher Nolan | 9.2 |
| 24 | Jurassic Park | 1993 | 🇺🇸 | Steven Spielberg | 9.2 |
| 25 | Guardians of the Galaxy | 2014 | 🇺🇸 | James Gunn | 9.2 |
| 26 | Back to the Future | 1985 | 🇺🇸 | Robert Zemeckis | 9.2 |
| 27 | Star Wars: Episode III – Revenge of the Sith | 2005 | 🇺🇸 | George Lucas | 9.2 |
| 28 | Star Wars : Episode V – The Empire Strikes Back | 1980 | 🇺🇸 | Irvin Kershner | 9.2 |
| 29 | Star Wars: Episode VI – Return of the Jedi | 1983 | 🇺🇸 | Richard Marquand | 9.2 |
| 30 | Hacksaw Ridge | 2016 | 🇺🇸 | Mel Gibson | 9.2 |
| 31 | Coco | 2017 | 🇺🇸 | Lee Unkrich | 9.1 |
| 32 | Green Book | 2018 | 🇺🇸 | Peter Farrelly | 9.1 |
| 33 | Harry Potter and the Deathly Hallows: Part 2 | 2011 | 🇺🇸 | David Yates | 9.1 |
| 34 | Skyfall | 2012 | 🇬🇧 | Sam Mendes | 9.1 |
| 35 | The Curious Case of Benjamin Button | 2009 | 🇺🇸 | David Fincher | 9.1 |
| 36 | Rise of the Planet of the Apes | 2011 | 🇺🇸 | Rupert Wyatt | 9.1 |
| 37 | The Pianist | 2002 | 🇫🇷 | Roman Polanski | 9.1 |
| 38 | The Shawshank Redemption | 1995 | 🇺🇸 | Frank Darabont | 9.1 |
| 39 | Guardians of the Galaxy Vol. 2 | 2017 | 🇺🇸 | James Gunn | 9.1 |
| 40 | Avengers: Infinity War | 2018 | 🇺🇸 | Frères Russo | 9.1 |
| 41 | Spider-Man: No Way Home | 2021 | 🇺🇸 | Jon Watts | 9.1 |
| 42 | Parasite | 2019 | 🇰🇷 | Bong Joon-ho | 9.1 |
| 43 | Rogue One: A Star Wars Story | 2016 | 🇺🇸 | Gareth Edwards | 9.1 |
| 44 | The Incredibles | 2004 | 🇺🇸 | Brad Bird | 9.0 |
| 45 | Incredibles 2 | 2018 | 🇺🇸 | Brad Bird | 9.0 |
| 46 | The Great Gatsby | 2013 | 🇺🇸 | Baz Luhrmann | 9.0 |
| 47 | Casino Royale | 2006 | 🇬🇧 | Martin Campbell | 9.0 |
| 48 | Kick-Ass | 2010 | 🇺🇸 | Matthew Vaughn | 9.0 |
| 49 | Life of Pi | 2012 | 🇺🇸 | Ang Lee | 9.0 |
| 50 | Ford v Ferrari | 2019 | 🇺🇸 | James Mangold | 9.0 |
| 51 | Marvel’s The Avengers | 2012 | 🇺🇸 | Joss Whedon | 9.0 |
| 52 | Avengers: Endgame | 2019 | 🇺🇸 | Frères Russo | 9.0 |
| 53 | Pirates of the Caribbean: The Curse of the Black Pearl | 2003 | 🇺🇸 | Gore Verbinski | 9.0 |
| 54 | Spider-Man | 2002 | 🇺🇸 | Sam Raimi | 9.0 |
| 55 | Your Name. | 2016 | 🇯🇵 | Makoto Shinkai | 9.0 |
| 56 | X-Men: Days of Future Past | 2014 | 🇺🇸 | Bryan Singer | 9.0 |
| 57 | War for the Planet of the Apes | 2017 | 🇺🇸 | Matt Reeves | 8.9 |
| 58 | Limitless | 2011 | 🇺🇸 | Neil Burger | 8.9 |
| 59 | Sherlock Holmes | 2010 | 🇺🇸 | Guy Ritchie | 8.9 |
| 60 | Star Trek Into Darkness | 2013 | 🇺🇸 | J. J. Abrams | 8.9 |
| 61 | Knives Out | 2019 | 🇺🇸 | Rian Johnson | 8.9 |
| 62 | Aladdin | 1992 | 🇺🇸 | John Musker et Ron Clements | 8.9 |
| 63 | Toy Story | 1996 | 🇺🇸 | John Lasseter | 8.9 |
| 64 | WALL‐E | 2008 | 🇺🇸 | Andrew Stanton | 8.9 |
| 65 | The Prestige | 2006 | 🇺🇸 | Christopher Nolan | 8.9 |
| 66 | The Batman | 2022 | 🇺🇸 | Matt Reeves | 8.9 |
| 67 | Charlie and the Chocolate Factory | 2005 | 🇺🇸 | Tim Burton | 8.8 |
| 68 | Monsters, Inc. | 2002 | 🇺🇸 | Pete Docter | 8.8 |
| 69 | Zootopia | 2016 | 🇺🇸 | Byron Howard | 8.8 |
| 70 | How to Train Your Dragon 2 | 2014 | 🇺🇸 | Dean DeBlois | 8.8 |
| 71 | Gladiator | 2000 | 🇺🇸 | Ridley Scott | 8.8 |
| 72 | Harry Potter and the Deathly Hallows: Part 1 | 2010 | 🇺🇸 | David Yates | 8.8 |
| 73 | No Time to Die | 2021 | 🇬🇧 | Cary Joji Fukunaga | 8.8 |
| 74 | Kingsman: The Golden Circle | 2017 | 🇺🇸 | Matthew Vaughn | 8.8 |
| 75 | Spirited Away | 2002 | 🇯🇵 | Hayao Miyazaki | 8.8 |
| 76 | Mad Max: Fury Road | 2015 | 🇦🇺 | George Miller | 8.8 |
| 77 | Pirates of the Caribbean: Dead Man’s Chest | 2006 | 🇺🇸 | Gore Verbinski | 8.8 |
| 78 | Back to the Future Part II | 1989 | 🇺🇸 | Robert Zemeckis | 8.8 |
| 79 | Shrek | 2001 | 🇺🇸 | Andrew Adamson | 8.8 |
| 80 | Spider-Man 2 | 2004 | 🇺🇸 | Sam Raimi | 8.8 |
| 81 | Star Wars: Episode II – Attack of the Clones | 2002 | 🇺🇸 | George Lucas | 8.8 |
| 82 | Star Wars: Episode IV – A New Hope | 1977 | 🇺🇸 | George Lucas | 8.8 |
| 83 | The Grand Budapest Hotel | 2014 | 🇺🇸 | Wes Anderson | 8.8 |
| 84 | Whiplash | 2014 | 🇺🇸 | Damien Chazelle | 8.8 |
| 85 | Ratatouille | 2007 | 🇺🇸 | Brad Bird | 8.7 |
| 86 | Wreck‐It Ralph | 2012 | 🇺🇸 | Rich Moore | 8.7 |
| 87 | How to Train Your Dragon: The Hidden World | 2019 | 🇺🇸 | Dean DeBlois | 8.7 |
| 88 | Dunkirk | 2017 | 🇺🇸 | Christopher Nolan | 8.7 |
| 89 | Gone Girl | 2014 | 🇺🇸 | David Fincher | 8.7 |
| 90 | The Imitation Game | 2014 | 🇺🇸 | Morten Tyldum | 8.7 |
| 91 | Klaus | 2019 | 🇪🇸 | Sergio Pablos | 8.7 |
| 92 | Dawn of the Planet of the Apes | 2014 | 🇺🇸 | Matt Reeves | 8.7 |
| 93 | The Lion King | 2019 | 🇺🇸 | Jon Favreau | 8.7 |
| 94 | Le Visiteur du futur | 2022 | 🇫🇷 | François Descraques | 8.7 |
| 95 | The Matrix | 1999 | 🇺🇸 | Les Wachowski | 8.7 |
| 96 | Pirates of the Caribbean: At World’s End | 2007 | 🇺🇸 | Gore Verbinski | 8.7 |
| 97 | Play | 2020 | 🇫🇷 | Anthony Marciano | 8.7 |
| 98 | Back to the Future Part III | 1990 | 🇺🇸 | Robert Zemeckis | 8.7 |
| 99 | The Shining | 1980 | 🇺🇸 | Stanley Kubrick | 8.7 |
| 100 | Shutter Island | 2010 | 🇺🇸 | Martin Scorsese | 8.7 |
YearMin <- min(CritiqueFilm$Année, na.rm = TRUE)
YearMax <- max(CritiqueFilm$Année, na.rm = TRUE)
Year_df <- data.frame(YearMin:YearMax)
for (k in 1:nrow(Year_df)){
Year_df$CritiqueFilm[k] <- sum(as.numeric(CritiqueFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
Year_df$NamesFilm[k] <- sum(as.numeric(NamesFilm$Année==Year_df$YearMin.YearMax[k]),na.rm = TRUE)
Year_df$Total <- Year_df$CritiqueFilm+Year_df$NamesFilm
}
Year_df <- as.data.frame(Year_df)
YearMin_graph <- 1998
YearMax_graph <- 2022
Year_Grade <- select(CritiqueFilm,Année,`Nos notes`)
Year_Grade <- Year_Grade[Year_Grade$Année>=YearMin_graph & Year_Grade$Année<=YearMax_graph,]
Year_Grade_2019 <- round(mean(Year_Grade$`Nos notes`[Year_Grade$Année==2019], na.rm = T),1)
Year_Grade_Min <- round(min(Year_Grade$`Nos notes`, na.rm = T),1)
Year_Grade_graph <- ggplot(Year_Grade, aes(Année, `Nos notes`)) +
geom_bin2d(binwidth = c(1, 1/3))+
scale_x_continuous(breaks = seq(YearMin_graph, YearMax_graph, 2))+
geom_smooth(method = lm, col = white, se = FALSE)+
scale_fill_gradient(low=purple, high = blue)+
labs(title="Count of films per Year",
subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
y="Grade", x="Year", fill="Count")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph
This graph represents my film consumption since
1998, the year I was born. The lighter the colour, the more
films I have seen with that rating in that period.
Since 2019, the
creation of my database, we see a greater diversity of bad and good
films with a tendency to be average overall. While the years before the
creation of my file have higher average scores because they correspond
to good films that “must” be seen.
Eventually, the aim will be to
see more films over this period to complete each square from
1 to 5 in score and see a real trend
that I imagine is decreasing.
Decades <- as.data.frame.matrix(table(select(NamesFilm,Décénie,Seen)))
colnames(Decades) <- c("To see","Seen")
Decades$Decades <- as.numeric(rownames(Decades))
Decades$Total <- as.numeric(Decades$`To see`+Decades$Seen)
Decades_graph <- ggplot(Decades)+
geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
geom_area(aes(x = Decades, y = Seen, fill = "Movies seen"))+
geom_label(aes(x = Decades, y = Seen, label = paste(Seen)),
fill=purple,
colour = white,
check_overlap = T) +
scale_fill_manual(values=c(purple, blue))+
scale_x_continuous(breaks = seq(1930,2020,10))+
labs(title = "Volume of films to be seen and films seen\naccording to recommendations",
y="Number of films",x="Decade",
fill="Legend")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "bottom",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Decades_graph
This graph represents my film consumption since
1998, the year I was born. The lighter the colour, the more
films I have seen with that rating in that period.
Since 2019, the
creation of my database, we see a greater diversity of bad and good
films with a tendency to be average overall. While the years before the
creation of my file have higher average scores because they correspond
to good films that “must” be seen.
Eventually, the aim will be to
see more films over this period to complete each square from
1 to 5 in score and see a real trend
that I imagine is decreasing.
CritiqueFilm$Différence <- CritiqueFilm$Différence %>% abs()
TopDiff <- filter(CritiqueFilm, CritiqueFilm$Différence > 1.4)
ggplot(CritiqueFilm, aes(`Note Presse`, `Nos notes`)) +
geom_hex(binwidth = c(.2,.33), color = purple) +
geom_smooth(col = pink, se = FALSE, method = "lm")+
geom_abline(intercept = 0, color = grey) +
geom_label_repel(data = TopDiff, aes(label = TopDiff$`English Title`),
vjust = "inward", hjust = "inward",
family="AvertaPE-Regular",
size = 8/.pt)+
scale_fill_gradient(low=purple, high = blue)+
xlim(0.8,5.3)+
ylim(0.8,5.3)+
labs(title="Rating of the film compared to the press ratings",
subtitle="Trend of overnotting")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Date_Evolution <- CritiqueFilm %>%
select(`Dernier visionnage`) %>%
`colnames<-`(c("Date")) %>%
filter(!is.na(`Date`)) %>%
arrange(`Date`) %>%
mutate(Week = format(`Date`, format = "%V") %>% as.numeric(),
Day = format(`Date`, format = "%d") %>% as.numeric(),
Month = format(`Date`, format = "%m") %>% as.numeric(),
LastYear = `Date`>=(Sys.time()-(365*24*60*60)))
Date_Evolution$Week <- Date_Evolution$Week+53-(format(Sys.time(),"%V") %>% as.numeric())
Date_Evolution$Week[Date_Evolution$Week>52] <- Date_Evolution$Week[Date_Evolution$Week>52]-52
Date_Evolution$Week <- round(Date_Evolution$Week,0)
Date_Evolution_table <- Date_Evolution %>% select(Week,LastYear) %>% table() %>% as.data.frame()
ggplot(Date_Evolution_table, aes(x=Week, y=Freq, group = LastYear, color = LastYear))+
geom_smooth(method = lm, formula = y ~ splines::bs(x, 7), se = FALSE)+
labs(title="Identify a decrease and gaps in my consumption",
subtitle="Film consumption over a year by week",
color = "Timeline",
x="Week", y="Count")+
scale_color_manual(values = c(white, yellow), label = c("Global","This year"))+
theme(text=element_text(size=12, family="AvertaPE-Regular",colour = white),
title=element_text(colour = white),
panel.background = element_rect(fill = purple),
plot.background = element_rect(fill = purple, color = purple),
panel.grid.major = element_line(colour = purple),
panel.grid.minor = element_line(colour = purple),
panel.border = element_blank(),
panel.margin.x = NULL,
panel.margin.y = NULL,
legend.text = element_text(colour = white),
legend.title = element_text(colour = white),
legend.position = "right",
legend.background = element_blank(),
legend.key=element_blank(),
axis.text = element_text(colour = white),
axis.text.x = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = white),
plot.caption = element_text(size = 10, color = blue))
Among the annual objectives, film consumption is central and must be more or less stable to achieve them. These curves allow us to identify the periodicity of this consumption according to the weeks on a sliding year with the current month on the right. Keeping the current year’s curve above the overall curve may be a priority to complete my cinephilia and achieve my goals. This filmography is a race against time and can be optimised by segmenting the films to see. Each must-see film is scored from 0 to 100% where 100 is the highest level of recommendation. Few are above 90% and can be considered a priority. The Academy Awards can also be an indicator of “quality” but more importantly of visibility, highlighting a variety of films although this selection is heavily influenced. Despite this sectorisation, the list of films to be seen is getting longer as well as shorter, but with a constant viewing frequency of one film per day, the list should be completed.
Table_Duration <- matrix(data = NA, ncol = 2, nrow = 4) %>%
as.data.frame() %>%
`colnames<-`(c("Data","Caption"))
Table_Duration$Data[1] <- NamesFilm %>% filter(Reco > 0.9) %>% nrow()
Table_Duration$Caption[1] <- "movies with a recommendation higher than 90%"
Table_Duration$Data[2] <- NamesFilm %>% filter(str_detect(NamesFilm$Source,"#Oscar")) %>% nrow()
Table_Duration$Caption[2] <- "Academy Awards nominated films on my must-see list"
Table_Duration$Data[3] <- paste0(round((NamesFilm$Durée[NamesFilm$Seen==FALSE] %>% sum(na.rm = T))/60,0),"h")
Table_Duration$Caption[3] <- "cumulative duration of the films to be seen"
Table_Duration$Data[4] <- NamesFilm %>% filter(Seen==TRUE, `Dernier visionnage` > (Sys.Date() %m-% months(1))) %>% nrow()
Table_Duration$Caption[4] <- "films seen this past month"
Table_Duration %>%
t() %>%
as.data.frame() %>%
kable(escape = F, align = c(rep("c", 10)),col.names = NULL, row.names = FALSE, booktabs = TRUE) %>%
kable_styling(full_width = T) %>%
column_spec(1:4, width = "30em") %>%
row_spec(1, bold = T, color = yellow, font_size = 30) %>%
row_spec(2, bold = T, color = white)
| 13 | 208 | 2139h | 11 |
| movies with a recommendation higher than 90% | Academy Awards nominated films on my must-see list | cumulative duration of the films to be seen | films seen this past month |
DirectorTop <- 40
DirectorHead <- as.data.frame(table(CritiqueFilm$Réalisateur))
DirectorHead <- DirectorHead[order(-DirectorHead$Freq),]
DirectorHead <- DirectorHead[DirectorHead$Freq>=3,]
#DirectorHead <- head(DirectorList,DirectorTop)
colnames(DirectorHead) <- c("Director","Freq")
DirectorHead <- as.data.frame(DirectorHead)
DirectorHead$Director <- as.character(DirectorHead$Director)
for (k in 1:nrow(DirectorHead)){
DirectorHead$Presse[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Note Presse`,na.rm = T),1)
DirectorHead$OurGrades[k]=round(mean(CritiqueFilm[CritiqueFilm$Réalisateur==DirectorHead$Director[k],]$`Nos notes`,na.rm = T),1)
Pays <- CritiqueFilm$`Emoji Pays`[CritiqueFilm$Réalisateur==DirectorHead$Director[k]] %>% unique()
DirectorHead$Countries[k] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}
DirectorHead$Diff <- DirectorHead$OurGrades-DirectorHead$Presse
DirectorHead$Total <- DirectorHead$Presse + DirectorHead$OurGrades
DirectorHead <- DirectorHead[order(-DirectorHead$Total),]
DirectorHead$Rank <- rownames(DirectorHead) <- 1:nrow(DirectorHead)
DirectorHead <- select(DirectorHead, Rank, Director, Countries, Freq, OurGrades, Presse, Diff, Total)
DirectorHead %>%
mutate(Total = color_tile(blue, purple)(Total)) %>%
head(50) %>%
kable(escape = F, align = c("l","l","c", "c", "c", "c", "c", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(2, bold = T) %>%
column_spec(8, bold = T, color = white) %>%
scroll_box(width = "100%", height = "400px")
| Rank | Director | Countries | Freq | OurGrades | Presse | Diff | Total |
|---|---|---|---|---|---|---|---|
| 1 | Peter Jackson | 🇺🇸 | 4 | 4.8 | 4.3 | 0.5 | 9.1 |
| 2 | Christopher Nolan | 🇺🇸 | 7 | 4.7 | 4.2 | 0.5 | 8.9 |
| 3 | Brad Bird | 🇺🇸 | 3 | 4.7 | 4.2 | 0.5 | 8.9 |
| 4 | George Lucas | 🇺🇸 | 4 | 4.8 | 4.1 | 0.7 | 8.9 |
| 5 | Dean DeBlois | 🇺🇸 | 3 | 4.6 | 4.3 | 0.3 | 8.9 |
| 6 | Matt Reeves | 🇺🇸 | 3 | 4.8 | 4.1 | 0.7 | 8.9 |
| 7 | Matthew Vaughn | 🇺🇸 | 5 | 4.8 | 4.0 | 0.8 | 8.8 |
| 8 | Pete Docter | 🇺🇸 | 4 | 4.5 | 4.3 | 0.2 | 8.8 |
| 9 | Gore Verbinski | 🇺🇸 | 3 | 4.8 | 4.0 | 0.8 | 8.8 |
| 10 | Quentin Tarantino | 🇺🇸 | 3 | 4.7 | 4.1 | 0.6 | 8.8 |
| 11 | Sam Mendes | 🇬🇧 | 3 | 4.8 | 4.0 | 0.8 | 8.8 |
| 12 | Andrew Stanton | 🇺🇸 | 3 | 4.5 | 4.2 | 0.3 | 8.7 |
| 13 | Martin Scorsese | 🇺🇸 | 4 | 4.4 | 4.2 | 0.2 | 8.6 |
| 14 | Sam Raimi | 🇺🇸 | 4 | 4.8 | 3.8 | 1.0 | 8.6 |
| 15 | Guy Ritchie | 🇺🇸 | 5 | 4.5 | 3.9 | 0.6 | 8.4 |
| 16 | Bong Joon-ho | 🇰🇷 | 3 | 4.4 | 4.0 | 0.4 | 8.4 |
| 17 | David Yates | 🇺🇸 | 7 | 4.6 | 3.8 | 0.8 | 8.4 |
| 18 | Frères Russo | 🇺🇸 | 4 | 4.3 | 4.1 | 0.2 | 8.4 |
| 19 | J. J. Abrams | 🇺🇸 | 4 | 4.6 | 3.7 | 0.9 | 8.3 |
| 20 | Bryan Singer | 🇺🇸 | 5 | 4.3 | 3.9 | 0.4 | 8.2 |
| 21 | David Fincher | 🇺🇸 | 4 | 4.2 | 4.0 | 0.2 | 8.2 |
| 22 | John Lasseter | 🇺🇸 | 4 | 4.3 | 3.9 | 0.4 | 8.2 |
| 23 | Wes Anderson | 🇺🇸 | 4 | 4.3 | 3.9 | 0.4 | 8.2 |
| 24 | Danny Boyle | 🇬🇧🇺🇸 | 3 | 4.4 | 3.8 | 0.6 | 8.2 |
| 25 | Steven Spielberg | 🇺🇸 | 6 | 4.2 | 3.9 | 0.3 | 8.1 |
| 26 | Clint Eastwood | 🇺🇸 | 3 | 4.0 | 4.1 | -0.1 | 8.1 |
| 27 | Jon Watts | 🇺🇸 | 3 | 4.2 | 3.9 | 0.3 | 8.1 |
| 28 | James Mangold | 🇺🇸 | 5 | 4.1 | 3.9 | 0.2 | 8.0 |
| 29 | Rian Johnson | 🇺🇸 | 3 | 4.4 | 3.6 | 0.8 | 8.0 |
| 30 | Zack Snyder | 🇺🇸 | 7 | 4.3 | 3.6 | 0.7 | 7.9 |
| 31 | Ridley Scott | 🇺🇸 | 6 | 3.9 | 4.0 | -0.1 | 7.9 |
| 32 | George Miller | 🇺🇸🇦🇺 | 3 | 4.4 | 3.5 | 0.9 | 7.9 |
| 33 | Robert Zemeckis | 🇺🇸 | 8 | 4.1 | 3.8 | 0.3 | 7.9 |
| 34 | James Gunn | 🇺🇸 | 4 | 4.1 | 3.8 | 0.3 | 7.9 |
| 35 | Gary Trousdale et Kirk Wise | 🇺🇸 | 3 | 4.1 | 3.8 | 0.3 | 7.9 |
| 36 | Hayao Miyazaki | 🇯🇵 | 5 | 3.6 | 4.2 | -0.6 | 7.8 |
| 37 | Francis Lawrence | 🇺🇸 | 4 | 4.2 | 3.6 | 0.6 | 7.8 |
| 38 | Ang Lee | 🇺🇸 | 3 | 4.2 | 3.6 | 0.6 | 7.8 |
| 39 | Chad Stahelski | 🇺🇸 | 3 | 4.2 | 3.6 | 0.6 | 7.8 |
| 40 | Marc Webb | 🇺🇸 | 3 | 4.1 | 3.7 | 0.4 | 7.8 |
| 41 | Tim Burton | 🇺🇸 | 9 | 3.9 | 3.8 | 0.1 | 7.7 |
| 42 | Andrew Adamson | 🇺🇸 | 4 | 4.1 | 3.6 | 0.5 | 7.7 |
| 43 | Pierre Coffin | 🇺🇸 | 3 | 3.9 | 3.8 | 0.1 | 7.7 |
| 44 | John Musker et Ron Clements | 🇺🇸 | 5 | 3.7 | 3.9 | -0.2 | 7.6 |
| 45 | Tom McGrath | 🇺🇸 | 5 | 4.1 | 3.5 | 0.6 | 7.6 |
| 46 | Carlos Saldanha | 🇺🇸 | 4 | 3.8 | 3.8 | 0.0 | 7.6 |
| 47 | Philippe Lacheau | 🇫🇷 | 4 | 3.8 | 3.8 | 0.0 | 7.6 |
| 48 | Jennifer Yuh Nelson | 🇺🇸 | 3 | 3.8 | 3.7 | 0.1 | 7.5 |
| 49 | Taika Waititi | 🇺🇸 | 3 | 4.0 | 3.5 | 0.5 | 7.5 |
| 50 | Jon Favreau | 🇺🇸 | 4 | 3.9 | 3.5 | 0.4 | 7.4 |
Director_graph <- ggplot(DirectorHead, aes(x = OurGrades, y = Freq))+
geom_vline(xintercept = mean(DirectorHead$OurGrades,na.rm = T),
col = grey)+
geom_point(aes(size = Freq, colour = OurGrades > mean(OurGrades,na.rm = T)))+
scale_size_continuous(range=c(0.5,5)) +
xlim(min(DirectorHead$OurGrades),5.5)+
geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
geom_text(aes(label = paste0(Director,": ",OurGrades),colour = OurGrades > mean(OurGrades,na.rm = T)),
hjust=-0.1,
vjust=-0.5,
check_overlap = T) +
geom_text(aes(x=mean(OurGrades,na.rm = T)-0.1,label=round(mean(OurGrades,na.rm = T),2), y=max(Freq)+1),
colour=grey,
angle=0,
vjust = 1.2,
family="AvertaPE-Regular",
size = 9/.pt)+
scale_color_manual(values=mypal(2)) +
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))+
labs(title="Directors by volume and score",
subtitle = "Sub",
y="Frequency", x="Grade")
Director_graph
AListed <- CritiqueFilm %>%
select(Réalisateur, `Notes cummulées`) %>%
`colnames<-`(c("Réalisateur","Notes")) %>%
group_by(Réalisateur) %>%
mutate(Max = max(Notes),
Min = min(Notes),
Mean = round(mean(Notes),1),
Count = n()) %>%
arrange(desc(Count)) %>%
select(-Notes) %>%
unique() %>%
head(15) %>%
arrange(desc(Mean))
ggplot(AListed, aes(y = Réalisateur)) +
geom_segment(aes(x=Min, xend=Max, y=Réalisateur, yend=Réalisateur),color="grey", size=.5)+
geom_point(aes(x=Max,color="Max"), size=2)+
geom_point(aes(x=Min,color="Min"), size=2)+
geom_point(aes(x=Mean), color=yellow, size=7)+
geom_text(aes(x=Mean, label = Count), col = purple, family = "AvertaPE-Black")+
scale_y_discrete(limits = rev(AListed$Réalisateur))+
scale_color_manual(values = c("Min" = pink, "Max" = blue), labels = c("Minimum", "Maximum", "Range"))+
labs(title = "Director ratings with range between worst and best film",
#subtitle = "Test",
x = "Grades", y = NULL,
color = "Grades",
caption = "Source : Critique Films")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Genre <- CritiqueFilm$Genre
Genre <- unlist(strsplit( Genre," / "))
Genre <- as.data.frame(table(Genre))
Genre <- Genre[order(-Genre$Freq),]
Genre$Freq <- round(1+Genre$Freq/10,0)
Genre1 <- select(CritiqueFilm,`Genre 1`,`Nos notes`)
Genre2 <- select(CritiqueFilm,`Genre 2`,`Nos notes`)
colnames(Genre2) <- colnames(Genre1) <- c("Genre","Note")
Genre_merge <- rbind(Genre1,Genre2)
Genre_merge
for(z in 1:nrow(Genre)){
Genre$Notes[z] <- round(mean(Genre_merge$Note[Genre_merge$Genre == Genre$Genre[z]], na.rm = T),1)
}
wordcloud(words = Genre$Genre, freq = Genre$Freq, min.freq = 1,
max.words=100, random.order=FALSE, rot.per=0,
colors=rev(mycols3),
family = "AvertaPE-Black")
Genre <- head(Genre,sum(as.numeric(Genre$Freq>1))) %>% as.data.frame()
Genre_graph <- ggplot(Genre, aes(x = Notes, y = Freq))+
xlim(min(Genre$Notes),max(Genre$Notes)+0.3)+
geom_vline(xintercept = mean(Genre$Notes,na.rm = T),
col = grey)+
geom_point(aes(colour = Notes > mean(Notes,na.rm = T)))+ #I use a formula to have conditional colours
geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
geom_text(aes(label = paste0(Genre,": ",Notes),colour = Notes > mean(Notes,na.rm = T)),
hjust=-0.1,
vjust=-0.5,
check_overlap = T) +
geom_text(aes(x=mean(Notes,na.rm = T)-0.05,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
colour=grey,
angle=0,
vjust = 0,
family="AvertaPE-Regular",
size = 9/.pt)+
scale_color_manual(values=mypal(2)) +
labs(title="Genre by volume and score",
subtitle = Sub,
y="Frequency", x="Grade")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Genre_graph
Genre_radar <- rbind(CritiqueFilm %>%
select(Grade, `Genre 1`) %>%
`colnames<-`(c("Grade","Genre")),
CritiqueFilm %>%
select(Grade, `Genre 2`) %>%
`colnames<-`(c("Grade","Genre"))) %>%
filter(!is.na(Genre), Grade == "A" | Grade == "E") %>%
group_by(Grade, Genre) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(Count = ifelse(Count > 75,75,Count),
Count = BBmisc::normalize(Count, method="range"))
Top_Genre <- (Genre_radar %>% group_by(Genre) %>% summarise(Sum = sum(Count)) %>% arrange(desc(Sum)) %>% head(10))$Genre
Genre_radar <- Genre_radar %>%
filter(Genre %in% Top_Genre)
Skill_radar <-xtabs(formula=Count~Grade+Genre,data=Genre_radar) %>%
as.data.frame.matrix()
Skill_radar <- Skill_radar %>%
mutate(Grade = row.names(Skill_radar)) %>%
select(Grade, everything()) %>%
`rownames<-`(1:nrow(Skill_radar)) %>%
select(Grade, c(Top_Genre))
library(ggradar)
Skill_radar_graph <- Skill_radar %>%
ggradar(grid.label.size = 4, # Affects the grid annotations (0%, 50%, etc.)
axis.label.size = 3.2,
group.point.size = 3, # Simply the size of the point
group.colours = c(blue, pink))+
labs(title = paste("Genre comparison between A-Listed and E-Listed"),
caption = "Source : Critique Films")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
legend.position = c(-0.1,0.2),
legend.justification = "left",
legend.text = element_text(size = 10),
legend.key = element_rect(fill = NA, color = NA),
legend.background = element_blank(),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Skill_radar_graph
Acteur <- Acteur[order(-Acteur$Freq),]
Acteur_graph <- ggplot(Acteur, aes(x = Notes, y = Freq))+
geom_vline(xintercept = mean(Acteur$Notes,na.rm = T),
col = grey)+
geom_text(aes(x=mean(Notes,na.rm = T)-0.1,label=round(mean(Notes,na.rm = T),2), y=max(Freq)+1),
colour=grey,
angle=0,
vjust = 1.2,
family="AvertaPE-Regular",
size = 9/.pt)+
geom_smooth(col = pink, method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
geom_point(aes(size = Freq, colour = Notes > mean(Notes,na.rm = T)))+
scale_size_continuous(range=c(0.5,5)) +
xlim(min(Acteur$Notes),6)+
geom_text(aes(label = paste(Acteur,Notes),colour = Notes > mean(Notes,na.rm = T)),
hjust=-0.1,
vjust=-0.5,
family="AvertaPE-Regular",
size = 9/.pt,
check_overlap = T) +
scale_color_manual(values=mycols2) +
labs(title="Actor with the best average according to their frequency",
subtitle="",
y="Frequency", x="Grade")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Acteur_graph
rownames(Acteur) <- Acteur$Rank <- 1:length(Acteur$Acteur)
for (i in 1:nrow(Acteur)){
Pays <- Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]][!is.na(Acteur_merge$`Emoji Pays`[Acteur_merge$Acteur==Acteur$Acteur[i]])] %>% unique()
Acteur$Countries[i] <- paste0(Pays[1],Pays[2],Pays[3],Pays[4],Pays[5],Pays[6],Pays[7],Pays[8],Pays[9],Pays[10],Pays[11]) %>% str_replace_all(pattern = "NA","")
}
Acteur <- select(Acteur, Rank, Acteur, Countries, Freq, Notes, Presse, Total)
Acteur %>%
mutate(Total = color_tile(blue, purple)(Total)) %>%
head(50) %>%
kable(escape = F, align = c("l","l","c", "c", "c", "c", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(2, bold = T) %>%
column_spec(7, bold = T, color = white) %>%
scroll_box(width = "100%", height = "400px")
| Rank | Acteur | Countries | Freq | Notes | Presse | Total |
|---|---|---|---|---|---|---|
| 1 | Hugh Jackman | 🇺🇸 | 14 | 3.8 | 3.5 | 7.3 |
| 2 | Robert Downey Jr. | 🇺🇸 | 13 | 4.3 | 3.9 | 8.2 |
| 3 | Chris Evans | 🇺🇸🇰🇷 | 12 | 4.1 | 3.6 | 7.7 |
| 4 | Brad Pitt | 🇺🇸🇫🇷 | 12 | 3.9 | 3.5 | 7.4 |
| 5 | Johnny Depp | 🇺🇸 | 11 | 4.2 | 3.8 | 8.0 |
| 6 | Tom Hanks | 🇺🇸 | 11 | 4.1 | 3.9 | 8.0 |
| 7 | Daniel Radcliffe | 🇳🇿🇺🇸 | 11 | 4.1 | 3.8 | 7.9 |
| 8 | Robert De Niro | 🇺🇸 | 11 | 3.7 | 3.6 | 7.3 |
| 9 | Ryan Reynolds | 🇺🇸 | 11 | 3.6 | 3.5 | 7.1 |
| 10 | Seth Rogen | 🇺🇸🇬🇧 | 11 | 3.3 | 3.0 | 6.3 |
| 11 | Emma Watson | 🇺🇸 | 10 | 4.2 | 3.9 | 8.1 |
| 12 | Chris Hemsworth | 🇺🇸 | 10 | 4.2 | 3.6 | 7.8 |
| 13 | Ben Stiller | 🇺🇸 | 10 | 3.8 | 3.3 | 7.1 |
| 14 | Leonardo DiCaprio | 🇺🇸 | 9 | 4.6 | 4.1 | 8.7 |
| 15 | Rupert Grint | 🇺🇸 | 9 | 4.3 | 3.9 | 8.2 |
| 16 | Scarlett Johansson | 🇺🇸 | 9 | 3.7 | 3.7 | 7.4 |
| 17 | Zac Efron | 🇺🇸 | 9 | 3.3 | 3.1 | 6.4 |
| 18 | Natalie Portman | 🇺🇸🇫🇷 | 8 | 4.4 | 3.8 | 8.2 |
| 19 | Daniel Craig | 🇺🇸🇬🇧 | 8 | 4.5 | 3.6 | 8.1 |
| 20 | Chris Pratt | 🇺🇸 | 8 | 4.3 | 3.7 | 8.0 |
| 21 | Angelina Jolie | 🇺🇸 | 8 | 4.1 | 3.8 | 7.9 |
| 22 | Robin Williams | 🇺🇸 | 8 | 4.1 | 3.6 | 7.7 |
| 23 | Jake Gyllenhaal | 🇺🇸🇨🇦 | 8 | 4.0 | 3.5 | 7.5 |
| 24 | Bradley Cooper | 🇺🇸 | 8 | 3.7 | 3.6 | 7.3 |
| 25 | Marion Cotillard | 🇫🇷🇺🇸 | 8 | 3.8 | 3.5 | 7.3 |
| 26 | Joseph Gordon-Levitt | 🇺🇸 | 8 | 3.7 | 3.4 | 7.1 |
| 27 | Will Smith | 🇺🇸 | 8 | 3.6 | 3.3 | 6.9 |
| 28 | Michaël Youn | 🇫🇷 | 8 | 2.9 | 2.2 | 5.1 |
| 29 | Ramzy Bedia | 🇫🇷 | 8 | 2.2 | 1.9 | 4.1 |
| 30 | Jennifer Lawrence | 🇺🇸 | 7 | 4.0 | 3.7 | 7.7 |
| 31 | Emma Stone | 🇺🇸 | 7 | 3.7 | 3.8 | 7.5 |
| 32 | Jack Black | 🇺🇸 | 7 | 3.9 | 3.6 | 7.5 |
| 33 | Anne Hathaway | 🇺🇸 | 7 | 3.7 | 3.6 | 7.3 |
| 34 | Robert Pattinson | 🇺🇸 | 7 | 3.9 | 3.4 | 7.3 |
| 35 | John Leguizamo | 🇺🇸 | 7 | 3.3 | 3.6 | 6.9 |
| 36 | Owen Wilson | 🇺🇸 | 7 | 3.6 | 3.3 | 6.9 |
| 37 | Kevin Hart | 🇺🇸 | 7 | 3.3 | 3.4 | 6.7 |
| 38 | Dwayne Johnson | 🇺🇸 | 7 | 3.2 | 3.4 | 6.6 |
| 39 | Jean Dujardin | 🇫🇷 | 7 | 3.5 | 3.0 | 6.5 |
| 40 | Kristen Stewart | 🇨🇱🇺🇸 | 7 | 3.3 | 2.9 | 6.2 |
| 41 | Anna Faris | 🇺🇸 | 7 | 2.9 | 2.4 | 5.3 |
| 42 | Ewan McGregor | 🇺🇸 | 6 | 4.5 | 3.8 | 8.3 |
| 43 | Kirsten Dunst | 🇺🇸 | 6 | 4.4 | 3.8 | 8.2 |
| 44 | Samuel L. Jackson | 🇺🇸 | 6 | 4.3 | 3.9 | 8.2 |
| 45 | Christian Bale | 🇺🇸 | 6 | 4.1 | 4.0 | 8.1 |
| 46 | Woody Harrelson | 🇺🇸 | 6 | 4.2 | 3.9 | 8.1 |
| 47 | Bruce Willis | 🇺🇸 | 6 | 4.2 | 3.8 | 8.0 |
| 48 | Chris Pine | 🇬🇧🇺🇸 | 6 | 4.1 | 3.7 | 7.8 |
| 49 | Tom Holland | 🇺🇸 | 6 | 4.0 | 3.8 | 7.8 |
| 50 | Adam Driver | 🇺🇸 | 6 | 4.0 | 3.7 | 7.7 |
AListed_Actor <- select(Acteur_merge,Acteur,Grade)
AListed_Actor <- as.data.frame(table(AListed_Actor))
AListed_Actor <- AListed_Actor[as.character(AListed_Actor$Acteur) %in% as.character(head(Acteur,10)$Acteur),]
AListed_Actor <- AListed_Actor[AListed_Actor$Freq>0,]
for (r in 1:nrow(AListed_Actor)){
AListed_Actor$Total[r] <- sum(AListed_Actor$Freq[AListed_Actor$Acteur==AListed_Actor$Acteur[r]], na.rm=T)
}
AListed_Actor <- AListed_Actor[order(-AListed_Actor$Total),]
AListed_Actor$Grade <- as.numeric(AListed_Actor$Grade)-6
AListed_Actor$Grade <- abs(AListed_Actor$Grade)
AListed_graph <- ggplot(AListed_Actor,aes(x = Acteur, y = Grade))+
geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
geom_point(size=AListed_Actor$Freq, color = blue)+
scale_size_continuous(range=c(0.5,10)) +
scale_x_discrete(guide = guide_axis(n.dodge=1),
limits=as.character(unique(AListed_Actor$Acteur)))+
labs(title="Actor with the best average according to their frequency",
subtitle="",
y="Grade", x="Actor")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))+
coord_flip()
AListed_graph
Acteur <- Acteur %>% arrange(-Freq)
Actor_list <- head(Acteur$Acteur,50) %>% as.character()
Actor_matrix <- matrix(ncol = Actor_list %>% length(), nrow=Actor_list %>% length())
colnames(Actor_matrix) <- Actor_list
rownames(Actor_matrix) <- Actor_list
for (c in 1:ncol(Actor_matrix)){
for (l in 1:nrow(Actor_matrix)){
Actor_matrix[l,c] <-
sum(CritiqueFilm$`Acteur 2`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
Actor_matrix[l,c] <- Actor_matrix[l,c]+
sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 1`]==rownames(Actor_matrix)[l],na.rm=T)
Actor_matrix[l,c] <- Actor_matrix[l,c]+
sum(CritiqueFilm$`Acteur 3`[colnames(Actor_matrix)[c]==CritiqueFilm$`Acteur 2`]==rownames(Actor_matrix)[l],na.rm=T)
}
}
Actor_matrix <- Actor_matrix %>% as.data.frame()
Actor_matrix$from <- rownames(Actor_matrix) %>% as.character()
Actor_matrix <- Actor_matrix[,c(ncol(Actor_matrix),1:(ncol(Actor_matrix)-1))]
# Transform the adjacency matrix in a long format
connect <- Actor_matrix %>%
as.data.frame() %>%
gather(key="to", value="value", -1) %>%
mutate(to = gsub("\\.", " ",to)) %>%
na.omit()
connect <- connect[connect$value>0,]
# Number of connection per person
coauth <- c( as.character(connect$from), as.character(connect$to)) %>%
as_tibble() %>%
group_by(value) %>%
summarize(n=n())
colnames(coauth) <- c("name", "n")
#dim(coauth)
# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )
# Find community
com <- walktrap.community(mygraph)
#max(com$membership)
#Reorder dataset and make the graph
coauth <- coauth %>%
mutate( grp = com$membership) %>%
arrange(grp) %>%
mutate(name=factor(name, name))
colfunc <- colorRampPalette(c(purple,pink,blue,yellow))
scale_col <- colfunc(max(coauth$grp))
# keep only this people in edges
connect <- connect %>%
filter(from %in% coauth$name) %>%
filter(to %in% coauth$name)
# Create a graph object with igraph
mygraph <- graph_from_data_frame( connect, vertices = coauth, directed = FALSE )
# Make the graph
ggraph(mygraph, layout="linear") +
geom_edge_arc(edge_colour=grey, fold=TRUE) +
geom_node_point(aes(size=n, color=as.factor(grp), fill=grp)) +
scale_color_manual(values =scale_col)+
scale_size_continuous(range=c(0.5,5)) +
geom_node_text(aes(label=name), angle=65, hjust=1, nudge_y = -0.5, size=3) +
expand_limits(x = c(-1.2, 1.2), y = c(-5,0))+
theme(text=element_text(size=12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.position = "none")
cat("
<style>
.leaflet-container {
background: #FFF;
}
</style>
")
map <- ne_countries() %>% as.data.frame()
write.csv(map$sovereignt,"map_Countries.csv")
map <- ne_countries()
map$freq <- 0
for (s in 1:nrow(map)){
map$freq[s] <- sum(CritiqueFilm$`Pays d'origine`==map$sovereignt[s], na.rm = T)
map$best_movie[s] <- CritiqueFilm$`Titre du film`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]], na.rm = T)==CritiqueFilm$`Nombre Classement`]
map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}
for (s in 1:nrow(map)){
map$best_movie_rate[s] <- CritiqueFilm$`Notes cummulées`[min(CritiqueFilm$`Nombre Classement`[CritiqueFilm$`Pays d'origine`==map$sovereignt[s]],na.rm = T)==CritiqueFilm$`Nombre Classement`]
}
map$freq[map$freq==0] <- NA
map$Grade <- case_when(
map$freq==1 ~ 1,
map$freq<5 ~ 2,
map$freq<100 ~ 3,
map$freq<500 ~ 4,
!is.na(map$freq) ~ 5,
is.na(map$freq) ~ 0
)
map$Label <- case_when(
map$freq==1 ~ "1",
map$freq<5 ~ "<5",
map$freq<100 ~ "<100",
map$freq<500 ~ "<500",
!is.na(map$freq) ~ ">500",
is.na(map$freq) ~ "0"
)
pal <- colorBin(
palette = mycols5, domain = map$Grade,
bins = seq(0, max(map$Grade, na.rm = TRUE), by = 1)
)
map$labels <- paste0(
"<strong> Country: </strong> ", map$sovereignt, "<br/> ",
"<strong> Number of movies seen : </strong> ", round(map$freq,0), "<br/> ",
"<strong> Best movie for this country : </strong> ", map$best_movie," : ",map$best_movie_rate,"/10", "<br/> "
) %>%
lapply(htmltools::HTML)
LeafMap <- leaflet(map) %>%
setMapWidgetStyle(list(background= "white")) %>%
setView(lng = 0, lat = 30, zoom = 1.3) %>%
addPolygons(
fillColor = ~ pal(Grade),
color = purple,
weight = 1,
opacity = 1,
fillOpacity = 1,
label = ~labels,
highlight = highlightOptions(
color = pink,
bringToFront = TRUE,
fill = 1, fillOpacity=1
)
) %>%
addLegend(
pal = pal,
values = ~Grade, #c("0","1","<5","<100","<500",">500"),
opacity = 1,
title = "Freq"
)
LeafMap
AListed_Saga <- select(CritiqueFilm,Saga,Grade)
AListed_Saga <- as.data.frame(table(AListed_Saga))
Table_saga <- as.data.frame(table(CritiqueFilm$Saga))
Table_saga <- Table_saga[order(-Table_saga$Freq),]
Table_saga <- Table_saga[Table_saga$Var1 != "Saga", ]
AListed_Saga <- AListed_Saga[AListed_Saga$Saga %in% as.character(head(Table_saga,10)$Var1),]
AListed_Saga <- AListed_Saga[AListed_Saga$Freq>0,]
for (r in 1:nrow(AListed_Saga)){
AListed_Saga$Total[r] <- sum(AListed_Saga$Freq[AListed_Saga$Saga==AListed_Saga$Saga[r]], na.rm=T)
}
AListed_Saga <- AListed_Saga[order(-AListed_Saga$Total),]
AListed_Saga$Grade <- as.numeric(AListed_Saga$Grade)-6
AListed_Saga$Grade <- abs(AListed_Saga$Grade)
AListed_graph <- ggplot(AListed_Saga,aes(x = Saga, y = Grade))+
geom_bar(stat = "summary", fun = "mean", fill=grey, alpha = 0.2)+
geom_point(size=AListed_Saga$Freq, color = blue)+
scale_x_discrete(guide = guide_axis(n.dodge=2),
limits=as.character(unique(AListed_Saga$Saga)))+
ylim(0,5)+
scale_size_discrete(range=c(0.5,20)) +
labs(title="Saga with the best average according to their frequency",
subtitle="",
y="Grade", x="Saga")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
AListed_graph
DecadeMin_graph <- 1930
DecadeMax_graph <- 2020
Decade_Grade_Saga <- select(CritiqueFilm,Saga,Année,`Nos notes`)
Decade_Grade_Saga <- Decade_Grade_Saga[Decade_Grade_Saga$Année>=DecadeMin_graph & Decade_Grade_Saga$Année<=DecadeMax_graph,]
Decade_Grade_Saga_table <- as.data.frame(table(Decade_Grade_Saga$Saga))
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(-Decade_Grade_Saga_table$Freq),]
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[Decade_Grade_Saga_table$Var1!="Saga",]
colnames(Decade_Grade_Saga_table)[1] <- "Saga"
for (s in 1:length(Decade_Grade_Saga_table$Saga)){
Decade_Grade_Saga_table$Grade[s] <- round(mean(CritiqueFilm$`Nos notes`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga],na.rm = T),1)
Decade_Grade_Saga_table$Presse[s] <- round(mean(CritiqueFilm$`Note Presse`[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
Decade_Grade_Saga_table$Diff[s] <- round(mean(CritiqueFilm$Différence[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T),1)
Decade_Grade_Saga_table$Duration[s] <- round(sum(CritiqueFilm$Durée[Decade_Grade_Saga_table$Saga[s]==CritiqueFilm$Saga], na.rm = T)/60,1)
}
rownames(Decade_Grade_Saga_table) <- 1:length(Decade_Grade_Saga_table$Saga)
knitr::kable(Decade_Grade_Saga_table) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
scroll_box(width = "100%", height = "370px")
| Saga | Freq | Grade | Presse | Diff | Duration |
|---|---|---|---|---|---|
| Disney | 70 | 3.9 | 3.8 | 0.4 | 123.1 |
| Marvel | 46 | 4.1 | 3.6 | 0.5 | 110.2 |
| DreamWorks | 31 | 3.8 | 3.6 | 0.5 | 47.3 |
| DC | 15 | 3.8 | 3.5 | 0.6 | 42.9 |
| Ghibli | 12 | 3.5 | 3.9 | 0.5 | 22.2 |
| Star Wars | 12 | 4.5 | 3.8 | 0.7 | 26.5 |
| Harry Potter | 10 | 4.3 | 3.8 | 0.5 | 28.0 |
| American Pie | 8 | 2.8 | 2.5 | 0.4 | 12.9 |
| James Bond | 6 | 4.3 | 3.6 | 0.8 | 15.8 |
| Jurassic Park | 5 | 4.3 | 3.3 | 0.9 | 12.4 |
| L’Âge de glace | 5 | 3.2 | 3.4 | 0.3 | 8.8 |
| Pirates des Caraïbes | 5 | 4.6 | 3.8 | 0.8 | 12.1 |
| Pokémon | 5 | 3.3 | 3.0 | 0.5 | 8.4 |
| Saw | 5 | 3.1 | 2.8 | 0.5 | 8.2 |
| Twilight | 5 | 3.6 | 3.1 | 0.5 | 10.2 |
| Astérix | 4 | 3.9 | 3.3 | 0.6 | 6.5 |
| Hunger Games | 4 | 4.2 | 3.6 | 0.5 | 9.1 |
| La Planète des Singes | 4 | 4.7 | 4.0 | 0.7 | 8.1 |
| La Terre du Milieu | 4 | 4.9 | 4.3 | 0.6 | 14.0 |
| Scary Movie | 4 | 2.7 | 2.2 | 0.5 | 5.7 |
| Transformers | 4 | 3.4 | 3.1 | 0.5 | 9.4 |
| American Nightmare | 3 | 3.4 | 3.1 | 0.4 | 4.9 |
| Ducobu | 3 | 2.6 | 2.0 | 0.6 | 4.7 |
| Fast & Furious | 3 | 2.4 | 2.8 | 0.5 | 5.8 |
| Hellboy | 3 | 3.3 | 2.8 | 0.6 | 6.0 |
| Hôtel Transylvanie | 3 | 3.6 | 3.6 | 0.1 | 6.2 |
| John Wick | 3 | 4.2 | 3.6 | 0.5 | 5.9 |
| Jumanji | 3 | 3.9 | 3.5 | 0.3 | 5.8 |
| Klapisch | 3 | 2.9 | 3.7 | 0.8 | 6.1 |
| La Nuit au Musée | 3 | 3.6 | 3.0 | 0.6 | 5.2 |
| Les Schtroumpfs | 3 | 3.3 | 2.9 | 0.4 | 5.0 |
| Lucky Luke | 3 | 2.8 | 1.6 | 1.2 | 4.6 |
| Moi, Moche et Méchant | 3 | 3.9 | 3.8 | 0.2 | 4.7 |
| Mon beau-père et moi | 3 | 3.2 | 3.1 | 0.2 | 5.3 |
| Narnia | 3 | 3.5 | 3.2 | 0.4 | 6.8 |
| Retour vers le futur | 3 | 4.6 | 4.3 | 0.4 | 5.7 |
| Sherlock Holmes | 3 | 4.3 | 3.9 | 0.7 | 6.3 |
| Star Trek | 3 | 4.7 | 3.8 | 0.9 | 6.4 |
| Tarantino | 3 | 4.7 | 4.1 | 0.5 | 7.8 |
| Very Bad Trip | 3 | 4.0 | 3.4 | 0.6 | 5.0 |
| 300 | 2 | 4.0 | 3.6 | 0.4 | 3.6 |
| Babysitting | 2 | 3.9 | 3.8 | 0.1 | 3.0 |
| Borat | 2 | 2.8 | 2.7 | 0.1 | 2.9 |
| Comme des bêtes | 2 | 3.0 | 3.6 | 0.6 | 2.9 |
| Comment tuer son boss? | 2 | 3.2 | 2.9 | 0.4 | 3.4 |
| Destination finale | 2 | 3.0 | 2.7 | 0.3 | 2.9 |
| Dr. Seuss | 2 | 3.4 | 3.3 | 0.7 | 3.2 |
| Happy Feet | 2 | 4.3 | 3.2 | 1.2 | 3.5 |
| Jump Street | 2 | 3.5 | 3.5 | 0.0 | 3.7 |
| Kingsman | 2 | 4.8 | 4.0 | 0.8 | 6.7 |
| Le Choc des Titans | 2 | 4.2 | 2.4 | 1.8 | 3.4 |
| Les Nouvelles Aventures | 2 | 1.3 | 1.5 | 0.2 | 3.4 |
| Matrix | 2 | 4.2 | 4.0 | 0.3 | 4.6 |
| Nos pires voisins | 2 | 3.4 | 2.6 | 0.8 | 3.1 |
| OSS 117 | 2 | 3.8 | 3.3 | 0.5 | 5.2 |
| Papa ou Maman | 2 | 3.6 | 3.5 | 0.2 | 3.0 |
| Percy Jackson | 2 | 3.3 | 2.7 | 0.6 | 3.8 |
| Red | 2 | 3.9 | 3.5 | 0.4 | 3.8 |
| Rio | 2 | 3.9 | 3.8 | 0.4 | 3.2 |
| Sister Act | 2 | 3.2 | 3.5 | 0.2 | 3.5 |
| Ted | 2 | 3.8 | 3.3 | 0.5 | 3.7 |
| Zombieland | 2 | 3.9 | 3.9 | 0.2 | 3.1 |
| Agatha Christie | 1 | 4.2 | 3.3 | 0.9 | 4.1 |
| Alien | 1 | 4.0 | 3.3 | 0.7 | 2.1 |
| Asimov | 1 | 3.7 | 3.7 | 0.0 | 1.7 |
| Assassin’s Creed | 1 | 3.6 | 2.9 | 0.7 | 1.9 |
| Blade Runner | 1 | 2.8 | 4.2 | 1.4 | 2.0 |
| Breaking Bad | 1 | 4.2 | 3.7 | 0.5 | 2.0 |
| Dernier train pour Busan | 1 | 3.9 | 4.1 | 0.2 | 2.0 |
| Dragon Quest | 1 | 4.1 | 3.8 | 0.3 | 1.7 |
| Ghost in the Shell | 1 | 3.2 | 3.3 | 0.1 | 1.8 |
| Happy Birthdead | 1 | 3.7 | 3.3 | 0.4 | 1.6 |
| Hawking | 1 | 4.2 | 4.3 | 0.1 | 2.0 |
| His Dark Materials | 1 | 4.0 | 2.8 | 1.2 | 1.9 |
| Jackass | 1 | 1.0 | 2.2 | 1.2 | 1.4 |
| Kick-Ass | 1 | 5.0 | 4.0 | 1.0 | 2.0 |
| La Tour Montparnasse | 1 | 3.4 | 2.4 | 1.0 | 1.5 |
| Le Labyrinthe | 1 | 3.8 | 3.8 | 0.0 | 1.9 |
| Le Petit Nicolas | 1 | 2.4 | 2.4 | 0.0 | 1.6 |
| Lego | 1 | 3.8 | 3.0 | 0.8 | 1.2 |
| LEGO | 1 | 3.6 | 4.0 | 0.4 | 1.7 |
| Les Visiteurs | 1 | 1.0 | 1.5 | 0.5 | 1.8 |
| Limitless | 1 | 5.0 | 3.9 | 1.1 | 1.8 |
| Mad Max | 1 | 4.6 | 4.2 | 0.4 | 2.0 |
| Matt Groening | 1 | 4.4 | 3.8 | 0.6 | 1.4 |
| Men in Black | 1 | 3.4 | 2.5 | 0.9 | 1.9 |
| MonsterVerse | 1 | 4.2 | 3.8 | 0.4 | 3.1 |
| Mythologie | 1 | 4.2 | 3.6 | 0.6 | 2.7 |
| Ocean | 1 | 3.0 | 3.0 | 0.0 | 1.8 |
| Orelsan | 1 | 3.2 | 3.4 | 0.2 | 1.5 |
| Prince of Persia | 1 | 4.4 | 3.3 | 1.1 | 1.9 |
| Sans un bruit | 1 | 4.3 | 3.8 | 0.6 | 3.1 |
| Seuls | 1 | 2.4 | 2.3 | 0.1 | 1.6 |
| Sonic | 1 | 3.4 | 3.3 | 0.1 | 1.6 |
| Tintin | 1 | 4.0 | 3.6 | 0.4 | 1.8 |
| Titeuf | 1 | 3.8 | 3.1 | 0.7 | 2.7 |
| Transperceneige | 1 | 4.6 | 3.5 | 1.1 | 2.1 |
Decade_Grade_Saga_table_top <- head(Decade_Grade_Saga_table,10)
Decade_Grade_Saga <- Decade_Grade_Saga[as.character(Decade_Grade_Saga$Saga) %in% as.character(Decade_Grade_Saga_table_top$Saga),]
Year_Grade_graph <- ggplot(Decade_Grade_Saga, aes(Année, Saga)) +
geom_bin2d(binwidth = c(10,1))+
scale_x_continuous(breaks = seq(DecadeMin_graph, DecadeMax_graph, 10))+
scale_fill_gradient(low=purple, high = blue)+
labs(y="Number of films",x="Decade")+
labs(title="Count of films per Saga and Decades",
subtitle=paste0("from ",DecadeMin_graph," to ",DecadeMax_graph),
y="Saga", x="Decade")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Year_Grade_graph
Decade_Grade_Saga_table <- head(Decade_Grade_Saga_table,15)
Saga_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Grade, y = Duration))+
geom_vline(xintercept = mean(Decade_Grade_Saga_table$Grade,na.rm = T),
col = grey)+
geom_text(aes(x=mean(Grade,na.rm = T)-0.1,label=round(mean(Grade,na.rm = T),2), y=max(Duration)+1),
colour=grey,
angle=0,
vjust = 1.2,
family="AvertaPE-Regular",
size = 9/.pt)+
geom_point(aes(colour = Grade > mean(Grade,na.rm = T)))+
xlim(min(Decade_Grade_Saga_table$Grade),5.2)+
geom_text(aes(label = paste(Saga,Grade),colour = Grade > mean(Grade,na.rm = T)),
hjust=-0.1,
vjust=-0.2,
family="AvertaPE-Regular",
size = 9/.pt,
check_overlap = T) +
scale_color_manual(values=mycols2) +
labs(title="Actor with the best average according to their frequency",
subtitle="",
y="Duration", x="Grade")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "none",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Saga_graph
CritiqueFilm$Durée <- round(CritiqueFilm$Durée/2,0)*2
CritiqueFilm$Durée[CritiqueFilm$Durée > min(boxplot.stats(CritiqueFilm$Durée)$out)] <- NA
Duration_graph <- ggplot(CritiqueFilm, aes(x = Durée, y = `Nos notes`))+
geom_bar(stat = "summary", fun = "mean", fill=purple)+
geom_smooth(color=pink)+
labs(title="Average rating per film duration",
y="Grade", x="Duration (min)")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Duration_graph
Decade_Grade_Saga_table <- Decade_Grade_Saga_table[order(Decade_Grade_Saga_table$Diff),]
AListed_Saga_Table_graph <- ggplot(Decade_Grade_Saga_table, aes(x = Diff, y = Saga))+
geom_bar(stat = "summary", fun = "sum", fill=purple)+
geom_vline(xintercept = 0, col = blue)+
scale_y_discrete(guide = guide_axis(n.dodge=1),
limits=as.character(unique(Decade_Grade_Saga_table$Saga)))+
labs(title="Difference between our Grades and Press' Grades",
y="Saga", x="Difference")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
AListed_Saga_Table_graph
Weekdays_table <- weekdays(as.Date(CritiqueFilm$`Dernier visionnage`))
Weekdays_order <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
Weekdays_table <- as.data.frame(Weekdays_table)
colnames(Weekdays_table)[1] <- "Days"
Weekdays_table <- Weekdays_table %>% filter(!is.na(Weekdays_table$Days))
Weekdays_graph <- ggplot(Weekdays_table, aes(x=Days))+
geom_bar(stat = "count", fill = purple) +
scale_x_discrete(limits = Weekdays_order)+
labs(title="Count of films per day of the week",
y="Count", x="Days")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Weekdays_graph
Month_table <- as.numeric(format(as.Date(CritiqueFilm$`Date de sortie`, format = "%Y-%m-%d"), "%m"))
Month_table <- as.data.frame(table(Month_table))
Month_table$Freq <- round(Month_table$Freq*100/sum(Month_table$Freq),1)
Month_graph <- ggplot(Month_table, aes(x=Month_table, y=Freq))+
geom_hline(yintercept = 100/12,col = grey)+
geom_bar(stat = "identity", fill=grey, width = 0.01)+
geom_point(size = 2, color = blue)+
geom_text(aes(label = paste0(Freq,"%")),
size=3,
hjust=0.5,
vjust=-1,
family="AvertaPE-Regular",
check_overlap = T) +
scale_x_discrete(limits = 1:12, label = month.abb[1:12])+
labs(title="Percentage of films seen per month",
y="Percent", x="Month")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Month_graph
# library(ggraph)
# library(igraph)
# library(tidyverse)
# library(viridis)
#
#
# Distribution <- select(CritiqueFilm,`Maison mère`,`Maison de distribution`)
# Distribution2 <- Distribution %>% count(`Maison de distribution`)
#
# for (d in 1:nrow(Distribution2)){
# Distribution2$`Maison mère`[d] <- Distribution$`Maison mère`[Distribution2$`Maison de distribution`[d]==Distribution$`Maison de distribution`]
# }
# Distribution2
#
# Distribution2 <- Distribution2[Distribution2$n>=3,]
# Distribution2 <- Distribution2[Distribution2$`Maison mère`!="Autre",]
#
# Distribution2$`Maison mère` <- str_replace_all(Distribution2$`Maison mère`," ","")
# Distribution2$`Maison de distribution` <- str_replace_all(Distribution2$`Maison de distribution`," ","")
#
# Distribution2$name <- paste0("Distribution.",Distribution2$`Maison mère`,".",Distribution2$`Maison de distribution`)
# Distribution2$from <- paste0("Distribution.",Distribution2$`Maison mère`)
#
# vertices <- Distribution2 %>% select(name,n,`Maison de distribution`)
# colnames(vertices) <- c("name","size","shortName")
#
# edges <- Distribution2 %>% select(from,name)
# colnames(edges) <- c("from","to")
#
# d1 <- data.frame(from="origin", to=paste("group", seq(1,10), sep=""))
# d2 <- data.frame(from=rep(d1$to, each=10), to=paste("subgroup", seq(1,100), sep="_"))
# hierarchy <- rbind(d1, d2)
#
# vertices <- data.frame(name = unique(c(as.character(hierarchy$from), as.character(hierarchy$to))) )
#
# vertices$id <- NA
# myleaves <- which(is.na( match(vertices$name, edges$from) ))
# nleaves <- length(myleaves)
# vertices$id[ myleaves ] <- seq(1:nleaves)
# vertices$angle <- 90 - 360 * vertices$id / nleaves
# vertices$hjust <- ifelse( vertices$angle < -90, 1, 0)
# vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
#
# mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
#
# ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
# geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05)) +
# geom_conn_bundle(data = get_con(from = from, to = to), alpha=0.2, colour="skyblue", width=0.9) +
# geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=1.5, alpha=1) +
# theme_void() +
# theme(
# legend.position="none",
# plot.margin=unit(c(0,0,0,0),"cm"),
# ) +
# expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))
#Les Derniers films vus
Top <- 10
Lastfilms <- CritiqueFilm[!is.na(CritiqueFilm$`Dernier visionnage`),]
Lastfilms <- select(Lastfilms,`English Title`,`Date de sortie`,`Pays d'origine`, Réalisateur,`Notes cummulées`,`Dernier visionnage`)
colnames(Lastfilms) <- c("Title","Date","Country","Director","Grade","Last Visio")
Lastfilms$`Last Visio` <- as.Date(as.POSIXct(Lastfilms$`Last Visio`))
Lastfilms$Date <- format(as.Date(Lastfilms$Date, "%m/%d/%y"),"%b %Y")
Lastfilms$Visio_num <- as.numeric(Lastfilms$`Last Visio`)
Lastfilms <- Lastfilms[order(-Lastfilms$Visio_num),]
Lastfilms <- head(Lastfilms,Top)
Lastfilms <- select(Lastfilms, -Visio_num)
Lastfilms$Country[Lastfilms$Country=="United States of America"] <- "USA"
Lastfilms %>%
mutate(Grade = color_tile(blue, purple)(Grade)) %>%
kable(escape = F, align = c("l", "c", "c", "l", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = T) %>%
column_spec(5, bold = T, color = white)
| Title | Date | Country | Director | Grade | Last Visio |
|---|---|---|---|---|---|
| X-Men: Days of Future Past | May 2014 | USA | Bryan Singer | 9.0 | 2022-10-21 |
| Mulan 2 : La Mission de l’Empereur | Feb 2005 | USA | Lynne Southerland & Darrell Rooney | 5.1 | 2022-10-20 |
| Go West: A Lucky Luke Adventure | Dec 2007 | France | Olivier Jean-Marie | 6.2 | 2022-10-14 |
| The Wolverine | Jul 2013 | USA | James Mangold | 7.1 | 2022-10-09 |
| Chicken Little | Dec 2005 | USA | Mark Dindal | 5.0 | 2022-10-09 |
| X-Men: First Class | May 2011 | USA | Matthew Vaughn | 8.7 | 2022-10-08 |
| Beauty and the Beast | Oct 1992 | USA | Gary Trousdale et Kirk Wise | 8.1 | 2022-10-08 |
| X-Men Origins: Wolverine | Apr 2009 | USA | Gavin Hood | 6.4 | 2022-10-07 |
| Super | Apr 2011 | USA | James Gunn | 7.0 | 2022-10-04 |
| Le crocodile du Botswanga | Feb 2014 | France | Lionel Steketee et Fabrice Éboué | 5.2 | 2022-10-04 |
DA <- select(CritiqueFilm,`Date de sortie`,DA)
DA$`Date de sortie` <- as.numeric(format(as.Date(DA$`Date de sortie`, format = "%Y-%m-%d"), "%Y"))
DA$`Date de sortie` <- round(DA$`Date de sortie`/10,0)*10
DA <- as.data.frame.matrix(table(DA))
colnames(DA) <- c("Total","Anime")
DA$Decades <- rownames(DA)
DA$Total <- DA$Total+DA$Anime
DA_graph <- ggplot(DA)+
geom_area(aes(x = Decades, y = Total, fill = "Movies to see"))+
geom_area(aes(x = Decades, y = Anime, fill = "Movies seen"))+
geom_label(aes(x = Decades, y = Anime, label = paste(Anime)),
fill=purple,
colour = white,
#size=3,
hjust=0.5,
vjust=-2,
family="AvertaPE-Regular",
check_overlap = T) +
scale_fill_manual(values=c(purple, blue))+
labs(y="Number of films",x="Decade")+
theme(text=element_text(size=12,family="AvertaPE-Regular"),
legend.title = element_blank(),
legend.position = "top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = blue))
## Warning: Ignoring unknown parameters: check_overlap
DA_graph
library(rvest)
library(stringr)
load("ToBuy.Rda")
if(max(ToBuy$Date)<(Sys.Date() %m-% days(7))){
ToBuy <- CritiqueFilm$`Titre du film`[CritiqueFilm$`A acheter`=="A acheter"]
ToBuy <- as.data.frame(ToBuy[!is.na(ToBuy)])
ToBuy <- as.data.frame(strsplit(ToBuy[,1]," - "))
ToBuy <- as.data.frame(t(ToBuy[2,]))
ToBuy <- ToBuy$`2`
ToBuy <- as.data.frame(ToBuy)
colnames(ToBuy) <- "ToBuy"
ToBuy$Link <- paste0("https://www.amazon.fr/s?k=",URLencode(ToBuy$ToBuy),"+blu-ray")
for (b in 1:length(ToBuy$Link)){
url <- ToBuy$Link[b]
website <- read_html(url)
ToBuy$Price[b] <- html_text(html_nodes(website,".a-price-whole"))[1]
ToBuy$Name[b] <- html_text(html_nodes(website,".s-line-clamp-4"))[1]
}
ToBuy$Price <- as.numeric(str_replace(ToBuy$Price,",","."))
ToBuy$Date <- Sys.Date()
save(ToBuy,file="ToBuy.Rda")}
load("ToBuy.Rda")
ToBuy <- ToBuy[order(ToBuy$Price),]
ToBuy$Price[ToBuy$Price>=30] <- NA
ToBuy$Price[ToBuy$Price<4] <- NA
ToBuy <- ToBuy[!is.na(ToBuy$Price),]
ToBuy$Price_rounded <- round(ToBuy$Price/2)*2
Bluray <- table(ToBuy$Price_rounded)
Bluray <- as.data.frame(Bluray)
Bluray$Var1 <- as.numeric(as.character(Bluray$Var1))
Bluray_graph <- ggplot(Bluray, aes(x=Var1, y=Freq))+
geom_bar(stat = "identity", fill=purple, width = 0.01)+
geom_point(size = 3, color = blue)+
geom_text(aes(label = paste0(Freq)),
size=3,
hjust=0.5,
vjust=-2,
family="AvertaPE-Regular",
check_overlap = T) +
ylim(0,15)+
theme(text=element_text(size=12,family="AvertaPE-Regular"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.position = "none",
axis.line = element_line(colour = blue))+
labs(title="Number of BluRay to buy",
y="Count", x="Price")
Bluray_graph
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
ToBuyTop <- ToBuy[1:10,c(1,3)]
ToBuyTop <- as.data.frame(ToBuyTop)
rownames(ToBuyTop) <- 1:10
ToBuyTop %>%
mutate(Price = color_tile(blue, purple)(Price)) %>%
kable(escape = F, align = c("l", "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = T) %>%
column_spec(2, bold = T, color = white)
| ToBuy | Price |
|---|---|
| Dragons 2 | 6.00 |
| Kung Fu Panda 3 | 6.00 |
| L’Âge de Glace 1 | 6.19 |
| Patients | 6.20 |
| Les Nouveaux Héros | 7.37 |
| Sully | 7.70 |
| The King’s Man : Première Mission | 7.81 |
| Les Douze Travaux d’Astérix | 7.99 |
| Imitation Game | 8.24 |
| Get Out | 8.38 |
Year_graph_DB <- select(CritiqueFilm,`Date de sortie`,`Notes cummulées`,Grade,Saga)
Year_graph_DB <- Year_graph_DB[Year_graph_DB$`Date de sortie`>as.Date("1985-01-01"),]
Year_graph_DB$Saga[!Year_graph_DB$Saga %in% (Decade_Grade_Saga$Saga %>% unique() %>% head(5))] <- "Trend"
Year_graph <- ggplot(Year_graph_DB, aes(x=`Date de sortie`,y=`Notes cummulées`/2)) +
geom_point(colour="#F2F2F2")+
ylim(0,5)+
scale_size_continuous(range=c(0.1,0.5))+
geom_smooth(aes(group=Saga, col = Saga), method = lm, formula = y ~ splines::bs(x, 4), se = FALSE)+
scale_color_manual(values=mypal(6)) +
labs(y="Number of films",x="Year")+
labs(title="Count of films per Year",
subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
y="Grade", x="Year")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Year_graph
#I import a picture and I set the size and the float
knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/Théotime/PhotoCV.svg")
## Warning in knitr::include_graphics("/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"): It is highly recommended to use relative paths for
## images. You had absolute paths: "/Users/theotimebourgeois/Desktop/Graphisme/
## Théotime/PhotoCV.svg"
Analysis conducted by Théotime Bourgeois
Master of Science - Data Science & Organizational Behavior
by Burgundy School of Business
Oscar <- NamesFilm[!is.na(NamesFilm$Oscar),]
Oscar <- Oscar[Oscar$Année>=YearMin_graph,]
Oscar$OscarTF <- Oscar$Oscar %>% str_detect("Oscar")
ggplot(Oscar, aes(x = Année, y = `Notes cummulées`))+
geom_count(colour = purple)+
geom_point(data = Oscar %>% filter(OscarTF == TRUE), colour = blue)+
scale_size("Count", range = c(1, 6))+
#stat_summary(aes(y = `Notes cummulées`,group = 1), fun=mean, colour=yellow,geom="line")+
geom_smooth(aes(group=OscarTF, col = OscarTF), method = lm, formula = y ~ splines::bs(x, 2), se = FALSE)+
scale_color_manual("Winner", values=c(purple,blue)) +
labs(y="Number of films",x="Year")+
labs(title="Evolution of the scores of the films presented at the Oscars",
subtitle=paste0("from ",YearMin_graph," to ",YearMax_graph),
y="Grade", x="Year")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Distri_Circle <- table(NamesFilm$`Maison de distribution`) %>%
as.data.frame() %>%
`colnames<-`(c("Maison de distribution","Freq"))
Distri_Circle <- merge(Distri_Circle,
NamesFilm %>% select(`Maison de distribution`,`Maison mère`) %>% unique())
Distri_Circle <- Distri_Circle %>%
mutate(root="root") %>%
filter(!is.na(`Maison mère`)) %>%
select(root,`Maison mère`,`Maison de distribution`,Freq) %>%
`colnames<-`(c("root","group","subgroup","value")) %>%
filter(group != "France") %>%
filter(group != "Autre") %>%
filter(value > 10)
Distri_Circle$subgroup <- paste0(Distri_Circle$subgroup," (",Distri_Circle$value,")")
Distri_Circle$pathString <- paste("world", Distri_Circle$group, Distri_Circle$subgroup, sep = "/")
population <- as.Node(Distri_Circle)
# Make the plot
#circlepackeR(population, size = "value")
# You can custom the minimum and maximum value of the color range.
p <- circlepackeR(population, size = "value", color_min = "hsl(240, 31%, 25%)", color_max = "hsl(0, 0%, 0%)")
saveWidget(p, file="circles.html")
# p
Top <- 5
Top_Acteur <- Acteur$Acteur %>%
head(Top) %>%
as.character()
Acteur_merge_Top <- Acteur_merge %>% filter(Acteur_merge$Acteur %in% Top_Acteur)
Acteur_Top_graph <-
ggplot(Acteur_merge_Top, aes(x=`Date de sortie` ,y=`Nos notes`,col = Acteur))+
geom_point()+
geom_smooth(aes(group=Acteur),
method = lm, formula = y ~ splines::bs(x, 3), se = FALSE)+
scale_color_manual(values=mypal(Top)) +
labs(title="Count of films per Year",
subtitle=paste0("from ",
Acteur_merge_Top$`Date de sortie` %>%
format("%Y") %>%
as.numeric() %>%
min(),
" to ",
YearMax_graph),
y="Grade", x="Year")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
Acteur_Top_graph
Acteur_Proj <- rbind(
NamesFilm %>% select(`Acteur 1`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
NamesFilm %>% select(`Acteur 2`,Seen) %>% `colnames<-`(c("Acteur","Seen")),
NamesFilm %>% select(`Acteur 3`,Seen) %>% `colnames<-`(c("Acteur","Seen"))) %>%
table() %>%
as.data.frame.matrix() %>%
arrange(`TRUE`) %>%
arrange(desc(`FALSE`)) %>%
filter(`TRUE`!=0) %>%
head(10)
Acteur_Proj <- Acteur_Proj %>%
mutate(Acteur = rownames(Acteur_Proj)) %>%
select(Acteur, `TRUE`, `FALSE`) %>%
`colnames<-`(c("Acteur","Seen","NotSeen")) %>%
mutate(Total = Seen+NotSeen) %>%
arrange(desc(Seen)) %>%
arrange(desc(Total)) %>%
mutate(Acteur = fct_reorder(Acteur,Total))
rownames(Acteur_Proj) <- 1:nrow(Acteur_Proj)
ggplot(Acteur_Proj, aes(y = Acteur))+
geom_segment(aes(x = 0, xend = Total, yend = Acteur, col = "Not Seen"), size = 9)+
geom_segment(aes(x = 0, xend = Seen, yend = Acteur, col = "Seen"), size = 6)+
geom_point(aes(x = Seen, col = "Seen"), size = 5)+
geom_point(aes(x = Total, col = "Not Seen"), size = 8)+
scale_color_manual(values = c(purple, pink))+
geom_text(aes(x=Seen, label = Seen), col = white, family = "AvertaPE-Black")+
geom_text(aes(x=Total, label = Total), col = white, family = "AvertaPE-Black")+
labs(title="Title",
subtitle="Test",
y=NULL, x="Count",
col = "Movies")+
theme(text=element_text(size=12, family="AvertaPE-Regular"),
panel.background = element_blank(),
legend.position = "right",
legend.background = element_blank(),
axis.line = element_line(colour = purple),
plot.title = element_text(size = 16, family = "AvertaPE-Black", color = purple),
plot.caption = element_text(size = 10, color = blue))
CritiqueFilm <- read_excel("/Users/theotimebourgeois/Documents/Documents/Critique Film.xlsx", sheet = "Notation")
CritiqueFilm$Saga[CritiqueFilm$Saga=="Batman"] <- "DC"
CritiqueFilm$Saga[CritiqueFilm$`Maison de distribution`=="DreamWorks Animation" & !is.na(CritiqueFilm$`Maison de distribution`)] <- "DreamWorks"
pastel <- c("#9B553A",
"#3F4A4D",
"#728989",
"#9FB9AC",
#"#CEAF65",
"#847359",
"#6D836E",
"#455C46",
#"#F5DAA1",
"#E59B97",
"#9E6B66",
"#513136")
colpastel <- colorRampPalette(pastel)
CritiqueFilm <- separate(data = CritiqueFilm, col = `Meilleure film/année`, into = c("RangAnnée", "Année"), sep = "-")
Top100 <- CritiqueFilm %>%
arrange(Rang) %>%
head(100) %>%
arrange(`Date de sortie`) %>%
mutate(ID = 1,
ID = cumsum(ID))
world <- map_data('world')
world <- ne_countries(scale = "medium", returnclass = "sf")
world$color[world$sovereignt %in% Top100$`Pays d'origine`] <- world$sovereignt[world$sovereignt %in% Top100$`Pays d'origine`]
world <- world[world$sovereignt!="Antarctica",]
Map_graph <- ggplot(data = world) +
geom_sf(aes(fill=color), color=NA)+
scale_fill_manual(values = colpastel(world$color %>% unique() %>% length()-1), na.value="#CEAF65")+
theme(panel.background = element_rect(fill = "#E8E8DC"),
plot.background = element_rect(fill = "#E8E8DC"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_blank(),
axis.ticks = element_blank(),
text = element_blank())
Country <- Top100$`Pays d'origine` %>%
unique() %>%
as.data.frame() %>%
mutate(Freq = 3)
Country$.[Country$.=="United States of America"] <- "USA"
Country_graph <- ggplot(Country, aes(., Freq)) +
geom_col(aes(fill = .), position = 'stack', width = 1) +
scale_fill_manual(values = colpastel(7)) +
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "black"),
axis.title.x = element_blank(),
panel.grid.major = element_blank())+
scale_y_continuous(limits = c(-20, max(Country$Freq))) +
coord_curvedpolar()
ggsave(file="Map_graph.svg", plot=Map_graph, width=10, height=8)
ggsave(file="MCountry_graph.png", plot=Country_graph, width=10, height=8)
Année <- CritiqueFilm %>%
select(Année,`Pays d'origine`,Grade) %>%
filter(Grade=="A",`Pays d'origine`%in% (c(Country$.,"United States of America"))) %>%
arrange(Année) %>%
mutate(ID = 1,
ID = cumsum(ID))
AnnéeTable <- Année %>% filter(Année==Top100$Année[1]) %>% mutate(ID=1)
for (year in 2:nrow(Top100)){
AnnéeTable <- rbind(AnnéeTable,Année %>% filter(Année==Top100$Année[year]) %>% mutate(ID=year))
}
AnnéeGraph <- ggplot(AnnéeTable)+
geom_bar(aes(x= ID, fill = `Pays d'origine`),position="fill")+
scale_fill_manual(values = colpastel(7))+
coord_polar()+
scale_x_continuous(limits = c(-4, 104))+
scale_y_continuous(limits = c(-7, 1))+
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
axis.title.x = element_blank(),
panel.grid.major = element_blank())
ggsave(file="AnnéeGraph.svg", plot=AnnéeGraph, width=10, height=8)
Category <- Top100 %>%
select(`English Title`, ID, Scénario, `Acteurs / Personnages`, `Ambiance / Concept`, `Aspect Visuel`, `Aspect Sonore`) %>%
pivot_longer(cols=3:7, names_to = "Category", values_to = "Grade")
Category <- merge(Category,
Category$Category %>%
unique() %>%
as.data.frame() %>%
mutate(CategoryID = 1,
CategoryID = cumsum(CategoryID)) %>%
`colnames<-`(c("Category","CategoryID")),
by = "Category") %>%
filter(Grade==5)
Category_graph <- ggplot()+
geom_bin2d(data = Category, aes(x = ID, y=CategoryID, fill=Category),binwidth = c(1, 1))+
coord_polar()+
xlim(c(-4,104))+
ylim(c(-20,6))+
scale_fill_manual(values = colpastel(5))+
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
axis.title.x = element_blank(),
panel.grid.major = element_blank())
ggsave(file="Category_graph.svg", plot=Category_graph, width=10, height=8)
Décénie <- Top100 %>%
select(Décénie) %>%
mutate(Freq = 1,
Décénie = Décénie %>% as.character()) %>%
group_by(Décénie) %>%
summarise(label=cumsum(Freq),
Count=max(label)) %>%
ungroup() %>%
mutate(Freq=1,ID=1,
ID=cumsum(ID),
label=ifelse(Count<3,NA,label),
label = ifelse(label==1,Décénie,NA))
## `summarise()` has grouped output by 'Décénie'. You can override using the
## `.groups` argument.
Décéniegraph <- ggplot(data=Décénie, aes(ID, Freq)) +
geom_col(aes(fill = Décénie), position = 'stack', width = 1.1) +
scale_fill_manual(values = colpastel(7)) +
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
axis.title.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())+
xlim(c(-4,104))+
scale_y_continuous(limits = c(-15, max(Décénie$Freq))) +
geom_textpath(aes(x=ID, y=Freq, label=label), vjust=-0.8, hjust=1, color="white", size=3, inherit.aes = FALSE )+
coord_polar()
ggsave(file="Décéniegraph.svg", plot=Décéniegraph, width=10, height=8)
## Warning: position_stack requires non-overlapping x intervals
## Warning: Removed 95 rows containing missing values (geom_textpath).
angle <- 77 - 333 * (Top100$ID) /100
Top100$hjust<-ifelse( angle < -90, 1, 0)
Top100$angle<-ifelse(angle < -90, angle+180, angle)
Top100$label <- ifelse(angle < -90, paste0(Top100$`English Title`," -",Top100$Année),paste0(Top100$Année,"- ",Top100$`English Title`))
Top100$Décénie <- Top100$Décénie %>% as.character()
y <- 6
Titlegraph <- ggplot(Top100, aes(x=ID, y=y))+
geom_col(aes(fill = Décénie), position = 'stack', width = 1.1, alpha=.3)+
geom_text(aes(label = label, y=0.2, hjust=hjust, angle=angle), size=1)+
xlim(c(-4,104))+
scale_y_continuous(limits = c(-10,y)) +
scale_fill_manual(values = colpastel(7)) +
coord_polar()+
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
axis.title.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
ggsave(file="Titlegraph.svg", plot=Titlegraph, width=10, height=8)
## Warning: position_stack requires non-overlapping x intervals
Réal <- Top100$Réalisateur %>% table() %>% as.data.frame() %>% arrange(desc(Freq)) %>% head(10) %>% `colnames<-`(c("Réalisateur","Freq"))
Réal$Col <- colpastel(10)
Top100 <- merge(Top100,Réal,by="Réalisateur",all=T)
Réalgraph <- ggplot(Top100, aes(x=ID, y=y))+
geom_text(aes(label = Réalisateur, y=0.2, hjust=hjust, angle=angle, col=Col), size=1)+
xlim(c(-4,104))+
scale_y_continuous(limits = c(-10,y)) +
scale_color_manual(values = colpastel(11)) +
coord_polar()+
theme(panel.border = element_blank(),
panel.background = element_rect(fill = "transparent"),
legend.position = "none",
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, vjust = 4, color = "white"),
axis.title.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
ggsave(file="Réalgraph.svg", plot=Réalgraph, width=10, height=8)
# Top100 <- merge(Top100,
# world %>% as.data.frame() %>% select(sovereignt,iso_a2) %>% unique() %>% `colnames<-`(c("Pays d'origine","iso")),
# by="Pays d'origine")