KR20 Estimate

library(dplyr)
library(tidyverse)
data <- read.csv("/cloud/project/Reliability Assignment.csv")

#number of items is denoted by n
(n <- ncol(data))
## [1] 25
#number of students is denoted by m
(m <- nrow(data))
## [1] 20

proportion of sample who answered item correctly (p)

p <- apply(data,2,mean)
p
##  sit1  sit2  sit3  sit4  sit5  sit6  sit7  sit8  sit9 sit10 sit11 sit12 sit13 
##  0.95  0.70  0.95  0.85  0.85  0.75  0.80  0.65  0.60  0.80  0.70  0.50  0.70 
## sit14 sit15 sit16 sit17 sit18 sit19 sit20 sit21 sit22 sit23 sit24 sit25 
##  0.40  0.60  0.70  0.45  0.45  0.50  0.45  0.45  0.45  0.30  0.40  0.60

proportion of sample who answered item incorrectly (q)

q <- 1-p
q
##  sit1  sit2  sit3  sit4  sit5  sit6  sit7  sit8  sit9 sit10 sit11 sit12 sit13 
##  0.05  0.30  0.05  0.15  0.15  0.25  0.20  0.35  0.40  0.20  0.30  0.50  0.30 
## sit14 sit15 sit16 sit17 sit18 sit19 sit20 sit21 sit22 sit23 sit24 sit25 
##  0.60  0.40  0.30  0.55  0.55  0.50  0.55  0.55  0.55  0.70  0.60  0.40

sum of pq

sum_pq <- p*q
sum_pq
##   sit1   sit2   sit3   sit4   sit5   sit6   sit7   sit8   sit9  sit10  sit11 
## 0.0475 0.2100 0.0475 0.1275 0.1275 0.1875 0.1600 0.2275 0.2400 0.1600 0.2100 
##  sit12  sit13  sit14  sit15  sit16  sit17  sit18  sit19  sit20  sit21  sit22 
## 0.2500 0.2100 0.2400 0.2400 0.2100 0.2475 0.2475 0.2500 0.2475 0.2475 0.2475 
##  sit23  sit24  sit25 
## 0.2100 0.2400 0.2400

Grand sum of pq:\(\sum{pq}\)

(Grand_sum <- sum(sum_pq))
## [1] 5.0725

Total score by test taker

Total_scores <- rowSums(data)
test_takers <- c(paste("Student",1:m))
as.data.frame(Total_scores, row.names = test_takers)
##            Total_scores
## Student 1            11
## Student 2            13
## Student 3            21
## Student 4             9
## Student 5            13
## Student 6            12
## Student 7            25
## Student 8            23
## Student 9            25
## Student 10           10
## Student 11           17
## Student 12           11
## Student 13           12
## Student 14           14
## Student 15           18
## Student 16           20
## Student 17            7
## Student 18           22
## Student 19            7
## Student 20           21

Variance of total scores by students

Variance <- var(Total_scores)
(Pop_var <- ((m-1)/m)*Variance)
## [1] 33.2475

KR20

KR20 <- (n / (n - 1)) * (1 - (Grand_sum/Pop_var))
KR20
## [1] 0.8827418

Split half: Odd-Even Items

Odd items

item_odd <- data[, c(TRUE,FALSE)]
head(item_odd)
##   sit1 sit3 sit5 sit7 sit9 sit11 sit13 sit15 sit17 sit19 sit21 sit23 sit25
## 1    1    1    0    0    1     0     1     1     0     0     1     0     1
## 2    1    1    1    1    1     1     1     0     0     0     0     0     0
## 3    1    1    1    1    0     1     1     1     1     1     1     0     1
## 4    1    1    1    0    0     1     0     0     0     1     0     0     0
## 5    1    1    1    1    1     1     1     0     0     0     0     0     0
## 6    1    1    1    1    0     1     0     0     0     1     0     0     0

Even items

item_even <- data[, c(FALSE,TRUE)]
head(item_even)
##   sit2 sit4 sit6 sit8 sit10 sit12 sit14 sit16 sit18 sit20 sit22 sit24
## 1    0    1    0    0     1     1     0     0     1     0     0     0
## 2    1    1    1    0     1     1     1     0     0     0     0     0
## 3    1    1    1    0     1     1     1     1     1     1     0     1
## 4    0    1    0    0     1     0     0     1     0     1     0     0
## 5    1    1    1    1     1     1     0     0     0     0     0     0
## 6    1    1    1    0     0     0     0     1     1     0     0     1

Total score by test takers by test subset

test_takers <- c(paste("Student",1:m))
Total_Odd <- rowSums(item_odd)
Total_Even <- rowSums(item_even)
Split_Total <- data.frame(Total_Odd,Total_Even)
as.data.frame(Split_Total, row.names = test_takers)
##            Total_Odd Total_Even
## Student 1          7          4
## Student 2          7          6
## Student 3         11         10
## Student 4          5          4
## Student 5          7          6
## Student 6          6          6
## Student 7         13         12
## Student 8         13         10
## Student 9         13         12
## Student 10         5          5
## Student 11         9          8
## Student 12         5          6
## Student 13         8          4
## Student 14         9          5
## Student 15        10          8
## Student 16        12          8
## Student 17         2          5
## Student 18        11         11
## Student 19         4          3
## Student 20        12          9

Correlation for the two subsets

(cor_odd_even <- cor(Total_Odd, Total_Even))
## [1] 0.8459988

Split half first and last half Items

First half items

item_first <- data[, c(1:ceiling((n/2)))]
head(item_first)
##   sit1 sit2 sit3 sit4 sit5 sit6 sit7 sit8 sit9 sit10 sit11 sit12 sit13
## 1    1    0    1    1    0    0    0    0    1     1     0     1     1
## 2    1    1    1    1    1    1    1    0    1     1     1     1     1
## 3    1    1    1    1    1    1    1    0    0     1     1     1     1
## 4    1    0    1    1    1    0    0    0    0     1     1     0     0
## 5    1    1    1    1    1    1    1    1    1     1     1     1     1
## 6    1    1    1    1    1    1    1    0    0     0     1     0     0

Second half items

item_second <- data[, c(ceiling((n/2)+1):n)]
head(item_second)
##   sit14 sit15 sit16 sit17 sit18 sit19 sit20 sit21 sit22 sit23 sit24 sit25
## 1     0     1     0     0     1     0     0     1     0     0     0     1
## 2     1     0     0     0     0     0     0     0     0     0     0     0
## 3     1     1     1     1     1     1     1     1     0     0     1     1
## 4     0     0     1     0     0     1     1     0     0     0     0     0
## 5     0     0     0     0     0     0     0     0     0     0     0     0
## 6     0     0     1     0     1     1     0     0     0     0     1     0

Total score by test takers by test subset

test_takers <- c(paste("Student",1:m))
Total_First <- rowSums(item_first)
Total_Second <- rowSums(item_second)
Split_Total2 <- data.frame(Total_First,Total_Second)
as.data.frame(Split_Total2, row.names = test_takers)
##            Total_First Total_Second
## Student 1            7            4
## Student 2           12            1
## Student 3           11           10
## Student 4            6            3
## Student 5           13            0
## Student 6            8            4
## Student 7           13           12
## Student 8           12           11
## Student 9           13           12
## Student 10           7            3
## Student 11          11            6
## Student 12           8            3
## Student 13           9            3
## Student 14          10            4
## Student 15          11            7
## Student 16          12            8
## Student 17           3            4
## Student 18          13            9
## Student 19           7            0
## Student 20          10           11

Correlation for the two subsets

(cor_first_second <- cor(Total_First, Total_Second))
## [1] 0.5026067

Spearman-Brown adjustment of split-half estimates

Odd-Even Split

(half <- 2 * cor_odd_even / (1 + cor_odd_even))
## [1] 0.9165757

Spearman-Brown adjustment for Odd-Even split is 0.92

First and second halves split

(half <- 2 * cor_first_second / (1 + cor_first_second))
## [1] 0.6689797

Spearman-Brown adjustment for first and second half split is 0.67