podatki <- read.table("./Nepremicnine.csv",
header=TRUE,
sep=";",
dec=",")
head(podatki)
## ID Cena
## 1 1 3144
## 2 2 3512
## 3 3 3555
## 4 4 3322
## 5 5 2980
## 6 6 3732
Opis spremenljivk:
mean(podatki$Cena)
## [1] 3109.677
sd(podatki$Cena)
## [1] 476.681
library(ggplot2)
ggplot(NULL, aes(c(-4, 4))) +
geom_line(stat = "function", fun = dt, args = list (df = 30)) +
ylab("Gostota") +
xlab("Vzorčne ocene") +
labs(title="Porazdelitev vzorčnih ocen")
qt(p = 0.025, df = 30, lower.tail = TRUE)
## [1] -2.042272
qt(p = 0.025, df = 30, lower.tail = FALSE)
## [1] 2.042272
t.test(podatki$Cena,
mu = 2770,
alternative = "two.sided")
##
## One Sample t-test
##
## data: podatki$Cena
## t = 3.9675, df = 30, p-value = 0.0004175
## alternative hypothesis: true mean is not equal to 2770
## 95 percent confidence interval:
## 2934.829 3284.525
## sample estimates:
## mean of x
## 3109.677
Velikost učinka (Effect size)
library(effectsize)
effectsize::cohens_d(podatki$Cena, mu = 2770)
## Cohen's d | 95% CI
## ------------------------
## 0.71 | [0.31, 1.10]
##
## - Deviation from a difference of 2770.
effectsize::interpret_cohens_d(0.71, rules="sawilowsky2009")
## [1] "medium"
## (Rules: sawilowsky2009)
Srednje velik učinek razlik. Pomaga ovrednostiti odkritje še malo bolj vsebinsko.