title: “Priprava_skuska” author: “Mikhail Kharlamau” date: “2025-01-14” output: html_document
library(datasets)
library(moments)
library(vioplot)
library(agricolae)
#1).
# data1 <- datasets::airquality
#
# #a:
# max1 <- sapply(data1, max, na.rm = TRUE)
# max1
# max1name <- names(which.max(max1))
# max1name
#
# #b:
# missing_vars <- names(data1)[sapply(data1, function(x) any(is.na(x)))]
# missing_vars
# data1_clean <- na.omit(data1)
#
# #c:
# skewness_values <- sapply(data1[, sapply(data1, is.numeric)], skewness)
# max_skew_var <- names(which.max(skewness_values))
# max_skew_var
#
# #d:
# hist(data1[[max_skew_var]], main = paste("Histogram:", max_skew_var), xlab = max_skew_var)
# boxplot(data1[max_skew_var], main = paste("Boxplot:", max_skew_var))
# vioplot(data1[max_skew_var],
# names = max_skew_var,
# main = paste("Violinplot:", max_skew_var))
#e
Vieme to urcit podla grafov:
Histogram: Zošikmenie (skewness) premennej je viditelne podla asymetrie rozdelenia: Napr. ak ma histogram dlhy chvost vpravo (pozitivne zosikmenie), vacsina hodnot je koncentrovana vlavo.
Boxplot: Na boxplote je zosikmenie viditelne z pozicie mediany a dlzky fuzov: Dlhsi fuz na jednej strane naznacuje smer zosikmenia.
Violinplot: Tento graf zobrazuje hustotu rozdelenia. Ako aj v boxplote jeden z chvostov je dlhsi - to naznacuje zosikmenie.
#2).
# data2 <- datasets::ToothGrowth
#
# #a
# shapiro_results <- sapply(data2, function(x) if (is.numeric(x)) shapiro.test(x)$p.value else 0)
# normal_vars <- names(shapiro_results[shapiro_results > 0.1])
# normal_vars
#
# #b
# t.test(data2[normal_vars[1]], mu = 20)
#
# #c
# t.test(data2[[normal_vars[1]]] ~ data2$supp, var.equal = TRUE)
# t.test(data2[[normal_vars[1]]] ~ data2$supp, var.equal = TRUE, conf.level = 0.9)
#3)
# data3 <- datasets::ChickWeight
#
# #a
# tapply(data3$weight, data3$Diet, shapiro.test)
# bartlett.test(data3$weight, data3$Diet)
#
# #b
# anova_model <- aov(weight ~ Diet, data = data3)
# summary(anova_model)
#
# #c
# TukeyHSD(anova_model)
#
# an3<-aov(weight ~ Diet, data=data3)
# result<- scheffe.test(an3,"Diet", alpha = 0.05)
# result
Vysledok Tukey: Ak je p-hodnota mensia nez 0,05 tak medzi tymto dvoma urovnami faktora je statisticky vyznamny rozdiel. poskytnu konkrétne pary, medzi ktorymi existuje vyznamny rozdiel v zavislosti od hodnoty p.
#4)
# data4 <- data1_clean
#
# #a
# cor_matrix <- cor(data4)
# ozone_cor <- cor_matrix["Ozone",]
# ozone_cor
# strngst_pos <- names(which.max(ozone_cor[ozone_cor<1]))
# strngst_neg <- names(which.min(ozone_cor))
# strngst_pos
# strngst_neg
#
# #b
# spearman_cor <- cor(data4,method = "spearman")
# kendall_cor <- cor(data4,method = "kendall")
#
# spearman_cor["Ozone",]
# kendall_cor["Ozone",]
#
# #c
# plot(data4$Ozone, data4[[strngst_pos]], main = paste("Ozone vs", strngst_pos))
# plot(data4$Ozone, data4[[strngst_neg]], main = paste("Ozone vs", strngst_neg))
#5)
# data5 <- datasets::ChickWeight
#
# #a
# linear_model <- lm(weight ~ Time + Diet, data = data5)
# summary(linear_model)$adj.r.squared
# summary(linear_model)
#
# #b
# full_model <- lm(weight ~ . , data = data5)
# summary(full_model)$adj.r.squared
# summary(full_model)
#
# #c
# AIC(linear_model, full_model)
# BIC(linear_model, full_model)
#
# #d
# best_model <- if (AIC(linear_model) < AIC(full_model)) {
# linear_model
# } else {
# full_model
# }
#
# library(randtests)
# residuals_test <- residuals(best_model)
# shapiro.test(residuals_test)
# runs.test(residuals_test)