title: “Priprava_skuska” author: “Mikhail Kharlamau” date: “2025-01-14” output: html_document

library(datasets) 
library(moments)
library(vioplot)
library(agricolae)

#1).

  # data1 <- datasets::airquality
  # 
  # #a:
  #   max1 <- sapply(data1, max, na.rm = TRUE) 
  #   max1
  #   max1name <- names(which.max(max1)) 
  #   max1name
  #   
  # #b:
  #   missing_vars <- names(data1)[sapply(data1, function(x) any(is.na(x)))]
  #   missing_vars 
  #   data1_clean <- na.omit(data1)
  #   
  # #c:
  #   skewness_values <- sapply(data1[, sapply(data1, is.numeric)], skewness)
  #   max_skew_var <- names(which.max(skewness_values))
  #   max_skew_var
  # 
  # #d:
  #   hist(data1[[max_skew_var]], main = paste("Histogram:", max_skew_var), xlab = max_skew_var)
  #   boxplot(data1[max_skew_var], main = paste("Boxplot:", max_skew_var))
  #   vioplot(data1[max_skew_var], 
  #       names = max_skew_var, 
  #       main = paste("Violinplot:", max_skew_var))
#e

Vieme to urcit podla grafov:

Histogram: Zošikmenie (skewness) premennej je viditelne podla asymetrie rozdelenia: Napr. ak ma histogram dlhy chvost vpravo (pozitivne zosikmenie), vacsina hodnot je koncentrovana vlavo.

Boxplot: Na boxplote je zosikmenie viditelne z pozicie mediany a dlzky fuzov: Dlhsi fuz na jednej strane naznacuje smer zosikmenia.

Violinplot: Tento graf zobrazuje hustotu rozdelenia. Ako aj v boxplote jeden z chvostov je dlhsi - to naznacuje zosikmenie.

#2).

  # data2 <- datasets::ToothGrowth
  # 
  # #a
  #   shapiro_results <- sapply(data2, function(x) if (is.numeric(x)) shapiro.test(x)$p.value else 0)
  #   normal_vars <- names(shapiro_results[shapiro_results > 0.1])
  #   normal_vars
  #   
  # #b
  #   t.test(data2[normal_vars[1]], mu = 20)
  # 
  # #c
  #   t.test(data2[[normal_vars[1]]] ~ data2$supp, var.equal = TRUE)
  #   t.test(data2[[normal_vars[1]]] ~ data2$supp, var.equal = TRUE, conf.level = 0.9)

#3)

  # data3 <- datasets::ChickWeight
  # 
  # #a
  #   tapply(data3$weight, data3$Diet, shapiro.test)
  #   bartlett.test(data3$weight, data3$Diet)
  #   
  # #b
  #   anova_model <- aov(weight ~ Diet, data = data3)
  #   summary(anova_model)  
  #   
  # #c
  #   TukeyHSD(anova_model)
  #   
  #   an3<-aov(weight ~ Diet, data=data3)
  #   result<- scheffe.test(an3,"Diet", alpha = 0.05)
  #   result
Vysledok Tukey: Ak je p-hodnota mensia nez 0,05 tak medzi tymto dvoma urovnami faktora je statisticky vyznamny rozdiel.  poskytnu         konkrétne pary, medzi ktorymi existuje vyznamny rozdiel v zavislosti od hodnoty p.

#4)

  # data4 <- data1_clean 
  # 
  # #a
  #   cor_matrix <- cor(data4)
  #   ozone_cor <- cor_matrix["Ozone",]
  #   ozone_cor
  #   strngst_pos <- names(which.max(ozone_cor[ozone_cor<1]))
  #   strngst_neg <- names(which.min(ozone_cor))
  #   strngst_pos
  #   strngst_neg
  #   
  # #b
  #   spearman_cor <- cor(data4,method = "spearman")
  #   kendall_cor <- cor(data4,method = "kendall")
  # 
  #   spearman_cor["Ozone",]
  #   kendall_cor["Ozone",]
  #   
  # #c
  #   plot(data4$Ozone, data4[[strngst_pos]], main = paste("Ozone vs", strngst_pos))
  #   plot(data4$Ozone, data4[[strngst_neg]], main = paste("Ozone vs", strngst_neg))

#5)

  # data5 <- datasets::ChickWeight
  # 
  # #a
  # linear_model <- lm(weight ~ Time + Diet, data = data5) 
  # summary(linear_model)$adj.r.squared
  # summary(linear_model) 
  # 
  # #b
  # full_model <- lm(weight ~ . , data = data5) 
  # summary(full_model)$adj.r.squared 
  # summary(full_model)
  # 
  # #c
  # AIC(linear_model, full_model)
  # BIC(linear_model, full_model)
  # 
  # #d
  # best_model <- if (AIC(linear_model) < AIC(full_model)) {
  # linear_model
  # } else {
  # full_model
  # }
  # 
  #   library(randtests)
  #   residuals_test <- residuals(best_model)
  #   shapiro.test(residuals_test) 
  #   runs.test(residuals_test)