Department of Industrial Psychology
Stellenbosch University
South Africa
The data should be stored in a .csv file. Each column in the .csv file should have a heading.
### Selecting items for inclusion
library(psych)
mydata <- read.csv(file.choose())
## Find covariance matrix of the items
mydata <- na.omit(mydata)
mycov <- cov(mydata)
mycov
## B1 B2 B3 B4 B5 B6 B7
## B1 0.9235153 0.2186944 0.2536520 0.2802872 0.3429451 0.2056722 0.1033142
## B2 0.2186944 1.4442406 0.6508017 0.2920939 0.5472685 0.1664814 0.1115201
## B3 0.2536520 0.6508017 1.4139903 0.3670950 0.3948899 0.2862541 0.3215655
## B4 0.2802872 0.2920939 0.3670950 1.1215012 0.4876270 0.4494896 0.3509955
## B5 0.3429451 0.5472685 0.3948899 0.4876270 1.6579023 0.3847637 0.1868892
## B6 0.2056722 0.1664814 0.2862541 0.4494896 0.3847637 0.9615633 0.4349359
## B7 0.1033142 0.1115201 0.3215655 0.3509955 0.1868892 0.4349359 1.1407325
## B8 0.1943371 0.1419764 0.1650470 0.2271335 0.2802716 0.2355921 0.1640946
## B9 0.1552046 0.0911395 0.2113437 0.2034604 0.2342354 0.2401712 0.2278643
## B10 0.1582794 0.1694033 0.2198268 0.2880240 0.3628943 0.2734807 0.2413918
## B11 0.2321908 0.2253998 0.2797585 0.3041935 0.4441719 0.2987398 0.1854821
## B12 0.2829888 0.2283256 0.3732705 0.4334717 0.5353736 0.3334875 0.2313200
## B8 B9 B10 B11 B12
## B1 0.1943371 0.1552046 0.1582794 0.2321908 0.2829888
## B2 0.1419764 0.0911395 0.1694033 0.2253998 0.2283256
## B3 0.1650470 0.2113437 0.2198268 0.2797585 0.3732705
## B4 0.2271335 0.2034604 0.2880240 0.3041935 0.4334717
## B5 0.2802716 0.2342354 0.3628943 0.4441719 0.5353736
## B6 0.2355921 0.2401712 0.2734807 0.2987398 0.3334875
## B7 0.1640946 0.2278643 0.2413918 0.1854821 0.2313200
## B8 0.9113250 0.3313690 0.3434232 0.3114432 0.2531661
## B9 0.3313690 0.7538775 0.3205626 0.3437303 0.2798363
## B10 0.3434232 0.3205626 1.0044729 0.3017912 0.2221255
## B11 0.3114432 0.3437303 0.3017912 1.0034052 0.6544816
## B12 0.2531661 0.2798363 0.2221255 0.6544816 1.3644362
## Step 1
## Find the highest covariance in the matrix
max(abs(mycov[upper.tri(mycov)]))
## [1] 0.6544816
## Find the names of the variables involved in the highest covariance
core1 <- which(mycov == max(abs(mycov[upper.tri(mycov)])), arr.ind = T)
core1.names <- row.names(core1)
core1.names
## [1] "B12" "B11"
## Calculate the total score of two items with highest covariance (Total1)
total1 <- rowSums(mydata[, row.names(core1)])
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
##Step 2
## Find the covariances of the remaining items with total1
cov(total1, mydata[, !items])
## B1 B2 B3 B4 B5 B6 B7
## [1,] 0.5151796 0.4537254 0.653029 0.7376652 0.9795455 0.6322273 0.4168021
## B8 B9 B10
## [1,] 0.5646093 0.6235666 0.5239167
## Find the highest item-total covariance
max(abs(cov(total1, mydata[, !items])))
## [1] 0.9795455
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total1, mydata[, !items]))]
add.item
## [1] "B5"
## Find new total score (Total2)
total2 <- total1 + mydata[, add.item]
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5"
suppressWarnings(reliability(mydata[core1.names], nfactors = 1))
## Measures of reliability
## reliability(keys = mydata[core1.names], nfactors = 1)
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.7 0.68 0.7 0.95 0.95 1 0.66 0.65 0.42
## med.r n.items
## All_items 0.36 3
suppressMessages(suppressWarnings(rel <- reliability(mydata[core1.names], nfactors = 1)))
myrel1 <- c(rel$result.df[1], rel$result.df[2], rel$result.df[3],
rel$result[8], rel$result.df[9], rel$result.df[4])
rel <- myrel1
rel
## [1] 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## Step3
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])
## B1 B2 B3 B4 B6 B7 B8 B9
## [1,] 0.8581247 1.000994 1.047919 1.225292 1.016991 0.6036913 0.8448809 0.857802
## B10
## [1,] 0.886811
## Find the highest item-total covariance
max(abs(cov(total2, mydata[, !items])))
## [1] 1.225292
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item
## [1] "B4"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4"
## Find new total score (total2)
total2 <- total2 + mydata[, add.item]
suppressWarnings(reliability(mydata[core1.names], nfactors = 1))
## Measures of reliability
## reliability(keys = mydata[core1.names], nfactors = 1)
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.71 0.71 0.71 0.93 0.95 0.98 0.76 0.63 0.38
## med.r n.items
## All_items 0.35 4
suppressMessages(suppressWarnings(rel2 <- reliability(mydata[core1.names], nfactors = 1)))
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel
## omega_h alpha omega_t beta mean.r uni
## rel 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
# Items in scale
items <- names(mydata) %in% core1.names
## Step 4
## Find the covariances of the remaining items with total2
cov(total2, mydata[!items])
## B1 B2 B3 B6 B7 B8 B9 B10
## [1,] 1.138412 1.293088 1.415014 1.466481 0.9546868 1.072014 1.061262 1.174835
## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))
## [1] 1.466481
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item
## [1] "B6"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4" "B6"
## Find new total score (total2)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])
## Measures of reliability
## reliability(keys = mydata[core1.names])
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.56 0.74 0.78 0.92 0.95 0.96 0.76 0.64 0.36
## med.r n.items
## All_items 0.35 5
rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel
## omega_h alpha omega_t beta mean.r uni
## rel 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## Step 5
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])
## B1 B2 B3 B7 B8 B9 B10
## B6 1.344084 1.459569 1.701268 1.389623 1.307606 1.301434 1.448316
## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))
## [1] 1.701268
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item
## [1] "B3"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4" "B6" "B3"
## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])
## Measures of reliability
## reliability(keys = mydata[core1.names])
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.58 0.74 0.78 0.91 0.94 0.97 0.79 0.67 0.33
## med.r n.items
## All_items 0.3 6
rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel
## omega_h alpha omega_t beta mean.r uni
## rel 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## Step 6
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])
## B1 B2 B7 B8 B9 B10
## B6 1.597736 2.110371 1.711188 1.472653 1.512777 1.668143
## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))
## [1] 2.110371
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item
## [1] "B2"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4" "B6" "B3" "B2"
## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])
## Measures of reliability
## reliability(keys = mydata[core1.names])
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.38 0.75 0.8 0.84 0.91 0.93 0.82 0.67 0.31
## med.r n.items
## All_items 0.29 7
rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel
## omega_h alpha omega_t beta mean.r uni
## rel 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514
## myrel2 0.7554543 0.7548580 0.7563636 0.6687127 0.3055051 0.8442300
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## Step 7
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])
## B1 B7 B8 B9 B10
## B6 1.81643 1.822708 1.61463 1.603917 1.837546
## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))
## [1] 1.837546
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item
## [1] "B10"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4" "B6" "B3" "B2" "B10"
## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])
## Measures of reliability
## reliability(keys = mydata[core1.names])
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.37 0.76 0.81 0.85 0.91 0.93 0.84 0.69 0.29
## med.r n.items
## All_items 0.28 8
rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel
## omega_h alpha omega_t beta mean.r uni
## rel 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514
## myrel2 0.7554543 0.7548580 0.7563636 0.6687127 0.3055051 0.8442300
## myrel2 0.7652682 0.7638599 0.7658639 0.6904294 0.2879252 0.8455026
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
## Step 8
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])
## B1 B7 B8 B9
## B6 1.97471 2.0641 1.958053 1.924479
## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))
## [1] 2.0641
## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item
## [1] "B7"
## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names
## [1] "B12" "B11" "B5" "B4" "B6" "B3" "B2" "B10" "B7"
## Find new total score (total2)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])
## Measures of reliability
## reliability(keys = mydata[core1.names])
## omega_h alpha omega.tot Uni r.fit fa.fit max.split min.split mean.r
## All_items 0.36 0.77 0.81 0.82 0.89 0.92 0.84 0.69 0.27
## med.r n.items
## All_items 0.27 9
rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3],
rel2$result[8], rel2$result.df[9], rel2$result.df[4])
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
round(rel, 3)
## omega_h alpha omega_t beta mean.r uni
## rel 0.699 0.685 0.699 0.655 0.420 0.948
## myrel2 0.711 0.707 0.713 0.629 0.376 0.933
## myrel2 0.737 0.737 0.738 0.635 0.359 0.920
## myrel2 0.745 0.743 0.746 0.674 0.326 0.914
## myrel2 0.755 0.755 0.756 0.669 0.306 0.844
## myrel2 0.765 0.764 0.766 0.690 0.288 0.846
## myrel2 0.774 0.772 0.774 0.691 0.274 0.817
# Items in scale
items <- names(mydata) %in% core1.names
items
## [1] FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE
plot(rel[, 6], ylab = "Unidimensionality")
plot(rel[, 2], ylab = "Alpha")
plot(rel[, 3], ylab = "Omega")
plot(rel[, 1], ylab = "Omega hierarchical")
plot(rel[, 4], ylab = "Beta")
plot(rel[, 5], ylab = "Mean inter-item correlation")
scree(mydata)
fa(mydata[items])
## Factor Analysis using method = minres
## Call: fa(r = mydata[items])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## B2 0.41 0.17 0.83 1
## B3 0.50 0.25 0.75 1
## B4 0.62 0.38 0.62 1
## B5 0.58 0.33 0.67 1
## B6 0.58 0.34 0.66 1
## B7 0.42 0.18 0.82 1
## B10 0.44 0.19 0.81 1
## B11 0.59 0.34 0.66 1
## B12 0.59 0.35 0.65 1
##
## MR1
## SS loadings 2.52
## Proportion Var 0.28
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 36 with the objective function = 1.93 with Chi Square = 1687.78
## df of the model are 27 and the objective function was 0.45
##
## The root mean square of the residuals (RMSR) is 0.08
## The df corrected root mean square of the residuals is 0.09
##
## The harmonic n.obs is 879 with the empirical chi square 420.04 with prob < 4.1e-72
## The total n.obs was 879 with Likelihood Chi Square = 396.2 with prob < 3e-67
##
## Tucker Lewis Index of factoring reliability = 0.702
## RMSEA index = 0.125 and the 90 % confidence intervals are 0.114 0.136
## BIC = 213.17
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.89
## Multiple R square of scores with factors 0.79
## Minimum correlation of possible factor scores 0.57
fa(mydata[items])