Profesorica predava predmet Upravljana ekonomika slovenskim in angleškim študentom. Pripravila je isti kolokvij za oba predmeta z namenom preveriti, ali se uspeh študentov razlikuje. Naključno je izbrala 40 študentov z angleškega predmeta in 51 študentov s slovenskega predmeta ter analizirala njihove rezultate. Se uspeh razlikuje?
library(readxl)
podatki <- read_xlsx("./Kolokvij.xlsx")
podatki <- as.data.frame(podatki)
head(podatki, 3)
## ID Tocke Skupina
## 1 1 9.0 1
## 2 2 9.0 1
## 3 3 7.5 1
Opis:
podatki$SkupinaF <- factor(podatki$Skupina,
levels = c(1, 2),
labels = c("Ang", "Slo"))
head(podatki, 3)
## ID Tocke Skupina SkupinaF
## 1 1 9.0 1 Ang
## 2 2 9.0 1 Ang
## 3 3 7.5 1 Ang
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.4.2
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
podatki %>%
group_by(SkupinaF) %>%
shapiro_test(Tocke)
## # A tibble: 2 × 4
## SkupinaF variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 Ang Tocke 0.972 0.416
## 2 Slo Tocke 0.962 0.101
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
describeBy(podatki$Tocke, podatki$SkupinaF)
##
## Descriptive statistics by group
## group: Ang
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 40 8.93 2.92 9 9 2.22 1.5 15 13.5 -0.26 -0.1
## se
## X1 0.46
## ----------------------------------------------------
## group: Slo
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 51 9.18 2.5 9 9.15 2.22 3 15 12 0.1 0.12
## se
## X1 0.35
t.test(podatki$Tocke ~ podatki$SkupinaF,
var.equal = FALSE,
alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: podatki$Tocke by podatki$SkupinaF
## t = -0.43368, df = 76.96, p-value = 0.6657
## alternative hypothesis: true difference in means between group Ang and group Slo is not equal to 0
## 95 percent confidence interval:
## -1.4061053 0.9031641
## sample estimates:
## mean in group Ang mean in group Slo
## 8.925000 9.176471
library(effectsize)
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
## The following objects are masked from 'package:rstatix':
##
## cohens_d, eta_squared
effectsize::cohens_d(podatki$Tocke ~ podatki$SkupinaF,
pooled_sd = FALSE)
## Cohen's d | 95% CI
## -------------------------
## -0.09 | [-0.51, 0.33]
##
## - Estimated using un-pooled SD.
library(effectsize)
interpret_cohens_d(0.09)
## [1] "very small"
## (Rules: cohen1988)
wilcox.test(podatki$Tocke ~ podatki$SkupinaF,
correct = FALSE,
exact = FALSE,
alternative = "two.sided")
##
## Wilcoxon rank sum test
##
## data: podatki$Tocke by podatki$SkupinaF
## W = 984.5, p-value = 0.7725
## alternative hypothesis: true location shift is not equal to 0
library(ggplot2)
ggplot(podatki, aes(x = Tocke, fill = SkupinaF)) +
geom_histogram(position = position_dodge(width = 0.5), binwidth = 1, colour = "Black") +
scale_x_continuous(breaks = seq(0, 20, 1)) +
ylab("Frekvenca") +
labs(fill = "Skupina")
library(effectsize)
effectsize(wilcox.test(podatki$Tocke ~ podatki$SkupinaF,
correct = FALSE,
exact = FALSE,
alternative = "two.sided"))
## r (rank biserial) | 95% CI
## ---------------------------------
## -0.03 | [-0.27, 0.20]
zelo majhne razlike.
library(effectsize)
interpret_rank_biserial(0.03)
## [1] "tiny"
## (Rules: funder2019)