Preparing Enviroment

library(data.table) 
library(ggplot2) 

runoff_summary <- readRDS('./data/runoff_summary.rds')
runoff_stats <- readRDS('./data/runoff_stats.rds')
runoff_day <- readRDS('./data/runoff_day.rds')
runoff_month <- readRDS('./data/runoff_month.rds')
runoff_summer <- readRDS('./data/runoff_summer.rds')
runoff_winter <- readRDS('./data/runoff_winter.rds')
runoff_year <- readRDS('./data/runoff_year.rds')

Explorer

Question 1

  1. Which is the difference between the median and the 0.5 quantile?

There is no difference. It is the same thing.

Question 2

  1. Why the median of runoff is slightly lower than the mean at each station?

Because the data are positively skewed. If there are many high positive values (outliers), they will increase the mean but will not affect the median. Sometimes we say that this distribution has a “fat” tail.

Question 3

  1. Do you notice something strange regarding the location of the stations LOBI and REES? Can you think of a possible explanation?

They are two very close stations where the runoff is maximum as Rhine reaches the sea. Although LOBI is downstream of REES, it is at higher altitude. This is not an error in data as the difference is too small. It is probably the exact location of the station and not the point of runoff measurement, which cannot be higher than the previous one.

Question 4

  1. Which were the months, seasons, years with the highest/lowest runoff at each location? Try to present them in comprehensive way. Feel free to improvise!
runoff_summer_TM1 <- runoff_summer[, min(value), by = sname]
runoff_summer_TM2 <- runoff_summer[, max(value), by = sname]
runoff_summer_to_merge <- merge(runoff_summer_TM1, runoff_summer_TM2, by = 'sname')
colnames(runoff_summer_to_merge) <- c('sname', 'min', 'max')
runoff_summer_to_merge
runoff_summer
runoff_summer_minmax <- merge(runoff_summer, runoff_summer_to_merge, by = 'sname')
runoff_summer_minmax
runoff_summer_max_final <- runoff_summer_minmax[runoff_summer_minmax$value == runoff_summer_minmax$max] 
runoff_summer_min_final <- runoff_summer_minmax[runoff_summer_minmax$value == runoff_summer_minmax$min] 

ggplot(data = runoff_summer_max_final, aes(x = sname, y = max, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust = 0) 

ggplot(data = runoff_summer_min_final, aes(x = sname, y = min, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust = 0) 

runoff_w_TM1 <- runoff_winter[, min(value), by = sname]
runoff_w_TM2 <- runoff_winter[, max(value), by = sname]
runoff_w_to_merge <- merge(runoff_w_TM1, runoff_w_TM2, by = 'sname')
colnames(runoff_w_to_merge) <- c('sname', 'min', 'max')

runoff_w_minmax <- merge(runoff_winter, runoff_w_to_merge, by = 'sname')

runoff_w_max_final <- runoff_w_minmax[runoff_w_minmax$value == runoff_w_minmax$max] 
runoff_w_min_final <- runoff_w_minmax[runoff_w_minmax$value == runoff_w_minmax$min] 

ggplot(data = runoff_w_max_final, aes(x = sname, y = max, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust = 0) 

ggplot(data = runoff_w_min_final, aes(x = sname, y = min, label = year)) +
  geom_point() +
  geom_text(aes(label = year),hjust = 0, vjust = 0) 

runoff_m_TM1 <- runoff_month[,min(value),by = sname]
runoff_m_TM2 <- runoff_month[,max(value),by = sname]
runoff_m_to_merge <- merge(runoff_m_TM1, runoff_m_TM2, by = 'sname')
colnames(runoff_m_to_merge) <- c('sname', 'min', 'max')

runoff_m_minmax <- merge(runoff_month, runoff_m_to_merge, by = 'sname')

runoff_m_max_final <- runoff_m_minmax[runoff_m_minmax$value == runoff_m_minmax$max] 
runoff_m_min_final <- runoff_m_minmax[runoff_m_minmax$value == runoff_m_minmax$min] 

ggplot(data = runoff_m_max_final, aes(x = sname, y = max, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust=0) 

ggplot(data = runoff_m_min_final, aes(x = sname, y = min, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust=0) 

runoff_y_TM1 <- runoff_year[, min(value), by = sname]
runoff_y_TM2 <- runoff_year[, max(value), by = sname]
runoff_y_to_merge <- merge(runoff_y_TM1, runoff_y_TM2, by = 'sname')
colnames(runoff_y_to_merge) <- c('sname', 'min', 'max')

runoff_y_minmax <- merge(runoff_year, runoff_y_to_merge, by = 'sname')

runoff_y_max_final <- runoff_y_minmax[runoff_y_minmax$value == runoff_y_minmax$max] 
runoff_y_min_final <- runoff_y_minmax[runoff_y_minmax$value == runoff_y_minmax$min] 
ggplot(data = runoff_y_max_final, aes(x = sname, y = max, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust = 0) 

ggplot(data = runoff_y_min_final, aes(x = sname, y = min, label = year)) +
  geom_point() +
  geom_text(aes(label = year), hjust = 0, vjust = 0) 

Question 5

  1. (Optional) Which is the average distance between each station in km? Which are the two closest and farest adjacent stations?