Hydrologie I - Popisná statistika

Code
lop <- c("data.table", "ggplot2")

to_instal <- lop[which(x = !(lop %in% installed.packages()[,"Package"]))]

if(length(to_instal) != 0) {
  
  install.packages(to_instal)
}

temp <- lapply(X = lop, 
               FUN = library, 
               character.only = T)
rm(temp)

## nezapomente zmenit cestu k Vasemu souboru s daty!!!

dta <- fread(input = "~/Desktop/docs/vyuka/Hydro_I/prez/6140700_Q_Day.Cmd.txt", 
             select = c(1,3),
             col.names = c("date", "value"))

str(object = dta)
Classes 'data.table' and 'data.frame':  40969 obs. of  2 variables:
 $ date : IDate, format: "1906-11-01" "1906-11-02" ...
 $ value: num  2.9 3.26 2.9 2.74 2.74 2.9 2.74 3.26 3.8 3.8 ...
 - attr(*, ".internal.selfref")=<externalptr> 
Code
summary(object = dta)
      date                value        
 Min.   :1906-11-01   Min.   :  0.200  
 1st Qu.:1934-11-16   1st Qu.:  1.380  
 Median :1962-12-01   Median :  2.280  
 Mean   :1962-12-01   Mean   :  3.717  
 3rd Qu.:1990-12-16   3rd Qu.:  4.190  
 Max.   :2018-12-31   Max.   :106.000  
Code
ggplot(data = dta,
       mapping = aes(x = date,
                     y = value)) +
  geom_line(colour = "royalblue4") +
  theme_bw() +
  labs(x = "Čas",
       y = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = "Divoká Orlice")

Code
rok <- 1983

dta_rok <- dta[between(x = date,
                       lower = as.IDate(x = paste0(rok - 1, 
                                                   "-11-01")),
                       upper = as.IDate(x = paste0(rok, 
                                                   "-10-31")), 
                       incbounds = TRUE), ]
dta_rok[, `:=`(tyden = frollmean(x = value,
                                 n = 7, 
                                 align = "center"),
               mesic = frollmean(x = value,
                                 n = 30, 
                                 align = "center"))]
dta_rok_m <- melt(data = dta_rok,
                  id.vars = 1)

ggplot(data = dta_rok,
       mapping = aes(x = date,
                     y = value)) +
  geom_line(colour = "royalblue4") +
  theme_bw() +
  labs(x = "Čas",
       y = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = paste0("Divoká Orlice - hydrologický rok ", rok))

Code
ggplot(data = dta_rok_m,
       mapping = aes(x = date,
                     y = value, 
                     colour = variable)) +
  geom_line(na.rm = TRUE) +
  scale_colour_manual(values = c("royalblue4",
                                 "red4",
                                 "olivedrab4"),
                      labels = c("Den",
                                 "Týden",
                                 "Měsíc"),
                      name = "Časový krok") +
  theme_bw() +
  labs(x = "Čas",
       y = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = paste0("Divoká Orlice - hydrologický rok ", rok)) +
  theme(legend.position = c(.9, 0.5),
        legend.background = element_blank())

Code
ggplot(data = dta_rok,
       mapping = aes(x = value)) +
  geom_histogram(binwidth = 1,
                 mapping = aes(fill = "Četnost"),
                 colour = "grey75",
                 show.legend = FALSE) +
  scale_fill_manual(values = "royalblue4",
                    name = NULL) +
  theme_bw() +
  labs(y = "Četnost výskytu",
       x = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = "Histogram") +
  theme(legend.position = c(.9, 0.5),
        legend.background = element_blank())

Code
ggplot(data = dta_rok,
       mapping = aes(x = value,
                     y = after_stat(x = density))) +
  geom_histogram(binwidth = 1,
                 mapping = aes(fill = "Relativní\nčetnost"),
                 colour = "grey75",
                 show.legend = FALSE) +
  scale_fill_manual(values = "royalblue4",
                    name = NULL) +
  stat_density(trim = TRUE,
               mapping = aes(colour = "Hustota\npravděpodobností"),
               geom = "line") +
  scale_colour_manual(values = "red4", 
                      name = NULL) +
  theme_bw() +
  labs(y = "Relativní četnost výskytu",
       x = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = "Histogram") +
  theme(legend.position = c(.9, 0.5),
        legend.background = element_blank())

Code
ggplot(data = dta_rok) +
  stat_ecdf(mapping = aes(x = value,
                          colour = "Distribuční funkce\nP(x ≥ X)"), 
            geom = "line") +
  stat_ecdf(mapping = aes(x = value,
                          y = 1 - ..y..,
                          colour = "Čára překročení\nP(x < X)"),
            geom = "line") +
  scale_colour_manual(values = c("red4", "royalblue4"),
                      name = NULL) +
  theme_bw() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = "p",
       x = expression(Průtok~(m^{3} %.% s^{"-1"})),
       title = "Empirická distribuční funkce a čára překročení") +
  theme(legend.position = c(.9, 0.5),
        legend.background = element_blank())

Code
regr <- data.table(x = c(NA, dta_rok$value),
                   y = c(dta_rok$value, NA))
fit <- lm(formula = y ~ x,
          data = regr)
eq <- substitute(italic(y) == a %.% italic(x) + b*";  "~~italic(r)^2~"="~r2, 
                 list(b = format(x = unname(obj = coef(fit)[1]), 
                                 digits = 2),
                      a = format(x = unname(obj = coef(fit)[2]), 
                                 digits = 2),
                      r2 = format(x = summary(object = fit)$r.squared, 
                                  digits = 3)))
eq <- as.character(x = as.expression(x = eq))

ggplot()+
  geom_point(data = regr, 
             mapping = aes(x = x, 
                           y = y),
             colour = "royalblue4") +
  geom_line(data = fortify(model = fit), 
            aes(x = x, 
                y = .fitted), 
            colour = "red4") + 
  labs(x = expression(Průtok~(m^{3} %.% s^{"-1"})~v~čase~t-1),
       y = expression(Průtok~(m^{3} %.% s^{"-1"})~v~čase~t),
       title = "Autoregrese denních dat") +
  geom_text(data = NULL,
            mapping = aes(x = quantile(x = regr, 
                                       probs = .85, 
                                       na.rm = TRUE),
                          y = quantile(x = regr, 
                                       probs = .995, 
                                       na.rm = TRUE),
                          label = eq),
            # size = 3,
            colour = "grey15", 
            parse = TRUE) +
  theme_bw()

Code
acf_e <- acf(x = dta_rok$value,
             plot = FALSE)
acf_df <- with(data = acf_e, 
               expr = data.frame(lag, acf))

ggplot(data = acf_df, 
       mapping = aes(x = lag, 
                     y = acf)) +
  geom_hline(yintercept = 0, 
             colour = "grey15") +
  geom_hline(yintercept = c(-.1, .1), 
             colour = "red4",
             linetype = "dashed") +
  geom_segment(mapping = aes(xend = lag, 
                             yend = 0), 
               colour = "royalblue4") +
  labs(x = "Lag (časový krok)",
       y = "Korelační koeficient",
       title = "Autokorelační funkce") +
  theme_bw()