Item selection via item-total covariances

Read the data and perform the analysis

The data should be stored in a .csv file. Each column in the .csv file should have a heading.

### Selecting items for inclusion 

library(psych)
mydata <- read.csv(file.choose())
## Find covariance matrix of the items
mydata <- na.omit(mydata)
mycov <- cov(mydata)
mycov

##            B1        B2        B3        B4        B5        B6        B7
## B1  0.9235153 0.2186944 0.2536520 0.2802872 0.3429451 0.2056722 0.1033142
## B2  0.2186944 1.4442406 0.6508017 0.2920939 0.5472685 0.1664814 0.1115201
## B3  0.2536520 0.6508017 1.4139903 0.3670950 0.3948899 0.2862541 0.3215655
## B4  0.2802872 0.2920939 0.3670950 1.1215012 0.4876270 0.4494896 0.3509955
## B5  0.3429451 0.5472685 0.3948899 0.4876270 1.6579023 0.3847637 0.1868892
## B6  0.2056722 0.1664814 0.2862541 0.4494896 0.3847637 0.9615633 0.4349359
## B7  0.1033142 0.1115201 0.3215655 0.3509955 0.1868892 0.4349359 1.1407325
## B8  0.1943371 0.1419764 0.1650470 0.2271335 0.2802716 0.2355921 0.1640946
## B9  0.1552046 0.0911395 0.2113437 0.2034604 0.2342354 0.2401712 0.2278643
## B10 0.1582794 0.1694033 0.2198268 0.2880240 0.3628943 0.2734807 0.2413918
## B11 0.2321908 0.2253998 0.2797585 0.3041935 0.4441719 0.2987398 0.1854821
## B12 0.2829888 0.2283256 0.3732705 0.4334717 0.5353736 0.3334875 0.2313200
##            B8        B9       B10       B11       B12
## B1  0.1943371 0.1552046 0.1582794 0.2321908 0.2829888
## B2  0.1419764 0.0911395 0.1694033 0.2253998 0.2283256
## B3  0.1650470 0.2113437 0.2198268 0.2797585 0.3732705
## B4  0.2271335 0.2034604 0.2880240 0.3041935 0.4334717
## B5  0.2802716 0.2342354 0.3628943 0.4441719 0.5353736
## B6  0.2355921 0.2401712 0.2734807 0.2987398 0.3334875
## B7  0.1640946 0.2278643 0.2413918 0.1854821 0.2313200
## B8  0.9113250 0.3313690 0.3434232 0.3114432 0.2531661
## B9  0.3313690 0.7538775 0.3205626 0.3437303 0.2798363
## B10 0.3434232 0.3205626 1.0044729 0.3017912 0.2221255
## B11 0.3114432 0.3437303 0.3017912 1.0034052 0.6544816
## B12 0.2531661 0.2798363 0.2221255 0.6544816 1.3644362

## Step 1
## Find the highest covariance in the matrix
max(abs(mycov[upper.tri(mycov)]))

## [1] 0.6544816

## Find the names of the variables involved in the highest covariance
core1 <- which(mycov == max(abs(mycov[upper.tri(mycov)])), arr.ind = T)
core1.names <- row.names(core1)
core1.names

## [1] "B12" "B11"

## Calculate the total score of two items with highest covariance (Total1)
total1 <- rowSums(mydata[, row.names(core1)])

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE

##Step 2
## Find the covariances of the remaining items with total1
cov(total1, mydata[, !items])

##             B1        B2       B3        B4        B5        B6        B7
## [1,] 0.5151796 0.4537254 0.653029 0.7376652 0.9795455 0.6322273 0.4168021
##             B8        B9       B10
## [1,] 0.5646093 0.6235666 0.5239167

## Find the highest item-total covariance
max(abs(cov(total1, mydata[, !items])))

## [1] 0.9795455

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total1, mydata[, !items]))]
add.item

## [1] "B5"

## Find new total score (Total2)
total2 <- total1 + mydata[, add.item]

core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"

suppressWarnings(reliability(mydata[core1.names], nfactors = 1))

## Measures of reliability 
## reliability(keys = mydata[core1.names], nfactors = 1)
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items     0.7  0.68       0.7 0.95  0.95      1      0.66      0.65   0.42
##           med.r n.items
## All_items  0.36       3

suppressMessages(suppressWarnings(rel <- reliability(mydata[core1.names], nfactors = 1)))
myrel1 <- c(rel$result.df[1], rel$result.df[2], rel$result.df[3], 
           rel$result[8], rel$result.df[9], rel$result.df[4])  
rel <- myrel1
rel

## [1] 0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE

## Step3
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])

##             B1       B2       B3       B4       B6        B7        B8       B9
## [1,] 0.8581247 1.000994 1.047919 1.225292 1.016991 0.6036913 0.8448809 0.857802
##           B10
## [1,] 0.886811

## Find the highest item-total covariance
max(abs(cov(total2, mydata[, !items])))

## [1] 1.225292

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item

## [1] "B4"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"

## Find new total score (total2)
total2 <- total2 + mydata[, add.item]
suppressWarnings(reliability(mydata[core1.names], nfactors = 1))

## Measures of reliability 
## reliability(keys = mydata[core1.names], nfactors = 1)
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.71  0.71      0.71 0.93  0.95   0.98      0.76      0.63   0.38
##           med.r n.items
## All_items  0.35       4

suppressMessages(suppressWarnings(rel2 <- reliability(mydata[core1.names], nfactors = 1)))
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
           rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel

##          omega_h     alpha   omega_t      beta    mean.r       uni
## rel    0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721

# Items in scale
items <- names(mydata) %in% core1.names



## Step 4
## Find the covariances of the remaining items with total2
cov(total2, mydata[!items])

##            B1       B2       B3       B6        B7       B8       B9      B10
## [1,] 1.138412 1.293088 1.415014 1.466481 0.9546868 1.072014 1.061262 1.174835

## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))

## [1] 1.466481

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[, !items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item

## [1] "B6"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"  "B6"

## Find new total score (total2)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])

## Measures of reliability 
## reliability(keys = mydata[core1.names])
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.56  0.74      0.78 0.92  0.95   0.96      0.76      0.64   0.36
##           med.r n.items
## All_items  0.35       5

rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
            rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel

##          omega_h     alpha   omega_t      beta    mean.r       uni
## rel    0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE

## Step 5
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])

##          B1       B2       B3       B7       B8       B9      B10
## B6 1.344084 1.459569 1.701268 1.389623 1.307606 1.301434 1.448316

## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))

## [1] 1.701268

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item

## [1] "B3"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"  "B6"  "B3"

## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])

## Measures of reliability 
## reliability(keys = mydata[core1.names])
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.58  0.74      0.78 0.91  0.94   0.97      0.79      0.67   0.33
##           med.r n.items
## All_items   0.3       6

rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
            rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel

##          omega_h     alpha   omega_t      beta    mean.r       uni
## rel    0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE

## Step 6
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])

##          B1       B2       B7       B8       B9      B10
## B6 1.597736 2.110371 1.711188 1.472653 1.512777 1.668143

## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))

## [1] 2.110371

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[, !items]))]
add.item

## [1] "B2"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"  "B6"  "B3"  "B2"

## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])

## Measures of reliability 
## reliability(keys = mydata[core1.names])
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.38  0.75       0.8 0.84  0.91   0.93      0.82      0.67   0.31
##           med.r n.items
## All_items  0.29       7

rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
            rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel

##          omega_h     alpha   omega_t      beta    mean.r       uni
## rel    0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514
## myrel2 0.7554543 0.7548580 0.7563636 0.6687127 0.3055051 0.8442300

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE

## Step 7
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])

##         B1       B7      B8       B9      B10
## B6 1.81643 1.822708 1.61463 1.603917 1.837546

## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))

## [1] 1.837546

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item

## [1] "B10"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"  "B6"  "B3"  "B2"  "B10"

## Find new total score (total5)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])

## Measures of reliability 
## reliability(keys = mydata[core1.names])
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.37  0.76      0.81 0.85  0.91   0.93      0.84      0.69   0.29
##           med.r n.items
## All_items  0.28       8

rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
            rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
rel

##          omega_h     alpha   omega_t      beta    mean.r       uni
## rel    0.6989632 0.6846895 0.6989630 0.6549481 0.4198950 0.9476182
## myrel2 0.7111309 0.7065410 0.7127888 0.6287549 0.3757443 0.9331721
## myrel2 0.7373941 0.7366386 0.7380708 0.6350210 0.3587329 0.9202435
## myrel2 0.7454908 0.7434860 0.7459778 0.6736918 0.3257232 0.9138514
## myrel2 0.7554543 0.7548580 0.7563636 0.6687127 0.3055051 0.8442300
## myrel2 0.7652682 0.7638599 0.7658639 0.6904294 0.2879252 0.8455026

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE

## Step 8
## Find the covariances of the remaining items with total2
cov(total2, mydata[, !items])

##         B1     B7       B8       B9
## B6 1.97471 2.0641 1.958053 1.924479

## Find the highest item-total covariance
max(abs(cov(total2, mydata[!items])))

## [1] 2.0641

## Find the name of the variable with the highest item-total covariance
v_name = colnames(mydata[!items])
add.item <- v_name[which.max(cov(total2, mydata[!items]))]
add.item

## [1] "B7"

## Find the names of the items in the scale
core1.names <- c(core1.names, add.item)
core1.names

## [1] "B12" "B11" "B5"  "B4"  "B6"  "B3"  "B2"  "B10" "B7"

## Find new total score (total2)
total2 <- total2 + mydata[add.item]
reliability(mydata[core1.names])

## Measures of reliability 
## reliability(keys = mydata[core1.names])
##           omega_h alpha omega.tot  Uni r.fit fa.fit max.split min.split mean.r
## All_items    0.36  0.77      0.81 0.82  0.89   0.92      0.84      0.69   0.27
##           med.r n.items
## All_items  0.27       9

rel2 <- reliability(mydata[core1.names], nfactors = 1)
myrel2 <- c(rel2$result.df[1], rel2$result.df[2], rel2$result.df[3], 
            rel2$result[8], rel2$result.df[9], rel2$result.df[4])  
rel <- rbind(rel, myrel2)
#results <- rbind(myrel1, myrel2)
colnames(rel) <- c("omega_h", "alpha", "omega_t", "beta", "mean.r", "uni")
round(rel, 3)

##        omega_h alpha omega_t  beta mean.r   uni
## rel      0.699 0.685   0.699 0.655  0.420 0.948
## myrel2   0.711 0.707   0.713 0.629  0.376 0.933
## myrel2   0.737 0.737   0.738 0.635  0.359 0.920
## myrel2   0.745 0.743   0.746 0.674  0.326 0.914
## myrel2   0.755 0.755   0.756 0.669  0.306 0.844
## myrel2   0.765 0.764   0.766 0.690  0.288 0.846
## myrel2   0.774 0.772   0.774 0.691  0.274 0.817

# Items in scale
items <- names(mydata) %in% core1.names
items

##  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE

plot(rel[, 6], ylab = "Unidimensionality")

plot(rel[, 2], ylab = "Alpha")

plot(rel[, 3], ylab = "Omega")

plot(rel[, 1], ylab = "Omega hierarchical")

plot(rel[, 4], ylab = "Beta")

plot(rel[, 5], ylab = "Mean inter-item correlation")

scree(mydata)

fa(mydata[items])

## Factor Analysis using method =  minres
## Call: fa(r = mydata[items])
## Standardized loadings (pattern matrix) based upon correlation matrix
##      MR1   h2   u2 com
## B2  0.41 0.17 0.83   1
## B3  0.50 0.25 0.75   1
## B4  0.62 0.38 0.62   1
## B5  0.58 0.33 0.67   1
## B6  0.58 0.34 0.66   1
## B7  0.42 0.18 0.82   1
## B10 0.44 0.19 0.81   1
## B11 0.59 0.34 0.66   1
## B12 0.59 0.35 0.65   1
## 
##                 MR1
## SS loadings    2.52
## Proportion Var 0.28
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  36  with the objective function =  1.93 with Chi Square =  1687.78
## df of  the model are 27  and the objective function was  0.45 
## 
## The root mean square of the residuals (RMSR) is  0.08 
## The df corrected root mean square of the residuals is  0.09 
## 
## The harmonic n.obs is  879 with the empirical chi square  420.04  with prob <  4.1e-72 
## The total n.obs was  879  with Likelihood Chi Square =  396.2  with prob <  3e-67 
## 
## Tucker Lewis Index of factoring reliability =  0.702
## RMSEA index =  0.125  and the 90 % confidence intervals are  0.114 0.136
## BIC =  213.17
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.89
## Multiple R square of scores with factors          0.79
## Minimum correlation of possible factor scores     0.57

fa(mydata[items])

Item selection via item-total covariances

Deon de Bruin

2023-06-07

Read the data and perform the analysis