### Projet collabratif
### Visualisation
### Hanjoon

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readr)
Trains <- read_csv("C:/Users/Ko/OneDrive/Nanterre/S2/Projet collaboratif/Regularities_by_liaisons_Trains_France.csv")
## Rows: 7806 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Departure station, Arrival station, Comment (optional) delays on a...
## dbl (27): Year, Month, Average travel time (min), Number of expected circula...
## lgl  (1): Comment (optional) delays at departure
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## library ajoutés
library(gifski)
library(gganimate)

Trains <- Trains %>%
  filter(`Average travel time (min)` < 700) # J'ai supprimé une donnée atypique

Trains %>%
  filter(`Departure station` == "PARIS MONTPARNASSE" |
           `Departure station` == "PARIS EST" |
           `Departure station` == "PARIS NORD" |
           `Departure station` == "PARIS LYON") -> depart_paris

Trains <- Trains %>%
  mutate(number_trains =`Number of expected circulations` - `Number of cancelled trains`)

Trains <- Trains %>%
  mutate("prct_retard_depart" = `Number of late trains at departure`/ number_trains)



## 1. Y a-t-il une relation entre la durée moyenne du trajet et la proportion de retard au départ ? -------------------------------------

# Trains$Year <- as.character(Trains$Year)

plot_duree_prct <- ggplot(Trains) +
  aes(x = `Average travel time (min)`, y = prct_retard_depart, colour = Year) +
  geom_point(shape = "circle", size = 1.5) +
  #scale_color_viridis_(breaks = c(2015,2016,2017,2018,2019,2020), direction = 1) +
  labs(title = "Relationentre la durée moyenne de trajet et le pourcentage du retard au départ",
       subtitle = "De 2015 à 2020", caption = "Université Paris Nanterre DES 22/23
      Projet collaboratif", 
       color = "Année", x = "Durée moyenne de trajet (min)", y = "Pourcentage du retard au départ (%)") +
  theme_minimal()


viz1 <- (plot_duree_prct + transition_time(Year) + ease_aes('cubic-in-out'))

animate(viz1, height = 600, width =800)

anim_save("viz1-hanjoon.gif")


## Mais, y a-t-il une différence entre les gares dans Paris ?

plot_depart_paris <- depart_paris %>% 
  group_by(`Departure station`) %>% 
  summarise(mean_dep = mean(`Average delay of late departing trains (min)`, na.rm = T)) %>%
  ggplot() +
  aes(x =`Departure station`, y = mean_dep,
      fill=`Departure station`, label = round(mean_dep, 1)) +
  labs(title = "Différence du retard moyen au départ de chaque gare de Paris",
       subtitle = "De 2015 à 2020", 
       caption = "Université Paris Nanterre DES 22/23
       Projet collaboratif",
       x = "Gare de départ", y = "Retard moyen (min)", fill = "Gare de départ") +
  geom_col() +
  geom_text(vjust = -0.5)

plot_depart_paris + transition_states(`Departure station`, wrap = FALSE) + shadow_mark()

anim_save("viz2-hanjoon.gif")