#Source: https://t.me/rstudioprogr
library(readxl)
library(dplyr)
## 
## Присоединяю пакет: 'dplyr'
## Следующие объекты скрыты от 'package:stats':
## 
##     filter, lag
## Следующие объекты скрыты от 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: пакет 'ggplot2' был собран под R версии 4.3.3
#data here: https://docs.google.com/spreadsheets/d/1FC4mR5P3cjh4HSWABkNJwskNDJ3L3PnJ/edit?usp=sharing&ouid=103177799370390773118&rtpof=true&sd=true

data <- read_excel("TTF ($ to kcm) and EU storage levels (TWh).xlsx") %>% as.data.frame()
head(data)
##         Date Month Year EU_Storage    TTF
## 1 2025-04-25     4 2025   430.6651 32.225
## 2 2025-04-24     4 2025   428.1775 33.850
## 3 2025-04-23     4 2025   426.1178 34.225
## 4 2025-04-22     4 2025   423.9151 34.300
## 5 2025-04-17     4 2025   409.4195 35.200
## 6 2025-04-16     4 2025   407.7433 35.500
average_5Y_2022 <- data %>% dplyr::filter(Year %in% c("2017", "2018", "2019", "2020", "2021")) %>% group_by(Month) %>% 
  summarise(average_2022_EU_Storage = mean(EU_Storage), Year = "2022")
average_5Y_2023 <- data %>% dplyr::filter(Year %in% c("2018", "2019", "2020", "2021", "2022")) %>% group_by(Month) %>% 
  summarise(average_2023_EU_Storage = mean(EU_Storage), Year = "2023")
average_5Y_2024 <- data %>% dplyr::filter(Year %in% c("2019", "2020", "2021", "2022", "2023")) %>% group_by(Month) %>% 
  summarise(average_2024_EU_Storage = mean(EU_Storage), Year = "2024")
average_5Y_2025 <- data %>% dplyr::filter(Year %in% c("2024", "2020", "2021", "2022", "2023")) %>% group_by(Month) %>% 
  summarise(average_2025_EU_Storage = mean(EU_Storage), Year = "2025")

average_data <- data %>% dplyr::filter(Year %in% c("2022", "2023", "2024", "2025")) %>% group_by(Year, Month) %>% summarise(EU_Storage = mean(EU_Storage), 
                                                             TTF = mean(TTF))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
head(average_data)
## # A tibble: 6 × 4
## # Groups:   Year [1]
##    Year Month EU_Storage   TTF
##   <dbl> <dbl>      <dbl> <dbl>
## 1  2022     1       514.  84.3
## 2  2022     2       366.  81.0
## 3  2022     3       294. 130. 
## 4  2022     4       322. 102. 
## 5  2022     5       446.  89.7
## 6  2022     6       591. 107.
Year_2022 <- merge(average_data, average_5Y_2022)
Year_2023 <- merge(average_data, average_5Y_2023)
Year_2024 <- merge(average_data, average_5Y_2024)
Year_2025 <- merge(average_data, average_5Y_2025)

colnames(Year_2022)[5] <- c("average_5Y_EUStorage")
colnames(Year_2023)[5] <- c("average_5Y_EUStorage")
colnames(Year_2024)[5] <- c("average_5Y_EUStorage")
colnames(Year_2025)[5] <- c("average_5Y_EUStorage")

final_dataset <- rbind(Year_2022, Year_2023, Year_2024, Year_2025)
final_dataset$EU_Storage1 <- final_dataset$EU_Storage - final_dataset$average_5Y_EUStorage
final_dataset$Month <- as.numeric(final_dataset$Month)

ggplot(data = final_dataset, aes(x = EU_Storage1, y = TTF, color = Year, group = Year)) + 
  geom_point(size = 3) + 
  geom_line(linewidth = 1) +
  labs(title = "Зависимость цены TTF от уровня заполнения хранилищ ЕС",
       subtitle = "Отклонение от среднего уровня за предыдущие 5 лет",
       x = "Отклонение уровня заполнения хранилищ ЕС (TWh)",
       y = "Цена TTF ($/kcm)",
       color = "Год") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        panel.grid.major = element_line(color = "gray90"),
        panel.grid.minor = element_blank()) +
  guides(color = guide_legend(nrow = 1))