library(readxl)
ds <- read_excel("data partly.xlsx") #replace it with your path.
ds[,34] = 6 -ds[, 34]
colnames(ds) <- c("id",
"time_spent",
"gender",
"age_group",
"driving_experience_years",
"driving_experience_years_group",
"annual_mileage",
"monthly_salary",
"education_level",
"other_insurance",
"have_accident",
"accident_speed_highway",
"speed_fine_highway",
"speed_bellow_limit",
"ATT1", "SN1", "IN1",
"SN2", "BH1", "PBC1",
"ATT3", "RP1", "IN2",
"BH2", "ATT2", "RP2",
"SN3", "PBC2", "SN4",
"RP3", "attention_check",
"BH3", "PBC3", "ATT4",
"IN3", "BH4", "PBC4")
sub = ds[, c("ATT1","ATT2","ATT3", "ATT4")]
psych::omega(sub)
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.67
## G.6: 0.65
## Omega Hierarchical: 0.64
## Omega H asymptotic: 0.84
## Omega Total 0.76
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 u2 p2
## ATT1 0.80 0.66 0.34 0.97
## ATT2 0.80 0.66 0.34 0.97
## ATT3 0.32 0.48 0.36 0.64 0.29
## ATT4 0.35 0.48 0.37 0.63 0.33
##
## With eigenvalues of:
## g F1* F2* F3*
## 1.51 0.01 0.47 0.06
##
## general/max 3.23 max/min = 77.32
## mean percent general = 0.64 with sd = 0.38 and cv of 0.6
## Explained Common Variance of the general factor = 0.74
##
## The degrees of freedom are -3 and the fit is 0
## The number of observations was 374 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 2 and the fit is 0.06
## The number of observations was 374 with Chi Square = 23.19 with prob < 9.2e-06
## The root mean square of the residuals is 0.09
## The df corrected root mean square of the residuals is 0.16
##
## RMSEA index = 0.168 and the 10 % confidence intervals are 0.111 0.233
## BIC = 11.34
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.89 0.07 0.63 0.36
## Multiple R square of scores with factors 0.80 0.00 0.40 0.13
## Minimum correlation of factor score estimates 0.59 -0.99 -0.20 -0.75
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.76 NA 0.52 0.79
## Omega general for total scores and subscales 0.64 NA 0.17 0.79
## Omega group for total scores and subscales 0.12 NA 0.35 0.00
psych::alpha(sub)
##
## Reliability analysis
## Call: psych::alpha(x = sub)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.66 0.67 0.65 0.34 2.1 0.029 2.4 0.8 0.29
##
## lower alpha upper 95% confidence boundaries
## 0.61 0.66 0.72
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## ATT1 0.55 0.55 0.45 0.29 1.2 0.040 0.0011 0.28
## ATT2 0.55 0.55 0.45 0.29 1.2 0.040 0.0018 0.30
## ATT3 0.66 0.66 0.61 0.40 2.0 0.031 0.0403 0.30
## ATT4 0.63 0.65 0.60 0.38 1.9 0.034 0.0452 0.28
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## ATT1 374 0.76 0.76 0.69 0.51 2.3 1.17
## ATT2 374 0.73 0.76 0.69 0.54 2.2 0.96
## ATT3 374 0.69 0.65 0.44 0.37 2.8 1.27
## ATT4 374 0.66 0.67 0.46 0.39 2.4 1.09
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## ATT1 0.32 0.30 0.21 0.13 0.05 0
## ATT2 0.27 0.37 0.27 0.09 0.01 0
## ATT3 0.17 0.29 0.25 0.16 0.13 0
## ATT4 0.19 0.44 0.21 0.11 0.06 0
library(ggcorrplot)
correlation_matrix <- cor(sub)
ggcorrplot(correlation_matrix, type = "lower", lab = TRUE)
psych::polychoric(sub)
## Call: psych::polychoric(x = sub)
## Polychoric correlations
## ATT1 ATT2 ATT3 ATT4
## ATT1 1.00
## ATT2 0.70 1.00
## ATT3 0.28 0.32 1.00
## ATT4 0.35 0.32 0.37 1.00
##
## with tau of
## 1 2 3 4
## ATT1 -0.46 0.31 0.95 1.7
## ATT2 -0.60 0.36 1.32 2.4
## ATT3 -0.97 -0.11 0.53 1.1
## ATT4 -0.88 0.32 0.97 1.6
ufs::scaleStructure(dat = ds,
items = c("ATT1","ATT2","ATT3", "ATT4"))
Dataframe: | ds |
Items: | ATT1, ATT2, ATT3 & ATT4 |
Observations: | 374 |
Positive correlations: | 6 |
Number of correlations: | 6 |
Percentage positive correlations: | 100 |
Omega (total): | 0.76 |
Omega (hierarchical): | 0.64 |
Revelle’s Omega (total): | 0.76 |
Greatest Lower Bound (GLB): | 0.76 |
Coefficient H: | 0.78 |
Coefficient Alpha: | 0.66 |
(Estimates assuming ordinal level not computed, as the polychoric correlation matrix has missing values.)
Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help (‘?ufs::scaleStructure’) for more information.
dfnum = ds
#gender --- respondent gender
ds$gender <- factor(ds$gender,
levels = c(1,2),
labels = c("Female","Male"))
#age_group --- respondent age group
ds$age_group <- ordered(ds$age_group,
levels = c(1:5),
labels = c("18-30","31-40","41-50","51-60",">60"))
#driving_experience_years --- driving experience in years
#driving_experience_years_group --- driving experience (years group
ds$driving_experience_years_group <- ordered(ds$driving_experience_years_group,
levels = c(1:4),
labels = c(" 1",
"2–5",
"6–10",
" 11"))
#annual_mileage --- Annual mileage (km)
ds$annual_mileage <- ordered(ds$annual_mileage, levels = c(1:5),
labels = c("<10,000",
"10,000–20,000",
"20,000–40,000",
"40,000–60,000",
" 60000"))
#monthly_salary --- Monthly income (Renminbi; currency of PR China )
ds$monthly_salary <- ordered(ds$monthly_salary, levels = c(1:3),
labels = c(" 5,000",
"5,000–10,000",
" 10,000"))
#education_level --- respondent education level
ds$education_level <- ordered(ds$education_level, levels = c(1:3),
labels = c("Lower secondary or below",
"Secondary education",
"Tertiary education"))
#other_insurance --- Insurance other than mandatory insurance
ds$other_insurance <- factor(ds$other_insurance,
levels = c(1,2),
labels = c("Yes","No"))
#have_accident --- Accident experience
ds$have_accident <- factor(ds$have_accident,
levels = c(1,2),
labels = c("Yes","No"))
#accident_speed_highway --- Accident experience in the highway due to high speed
ds$accident_speed_highway <- factor(ds$accident_speed_highway,
levels = c(1,2),
labels = c("Yes","No"))
#speed_fine_highway --- Fine experience in the highway due to high speed
ds$speed_fine_highway <- factor(ds$speed_fine_highway,
levels = c(1,2),
labels = c("Yes","No"))
#speed_bellow_limit --- Low-speed driving frequency
ds$speed_bellow_limit <- ordered(ds$speed_bellow_limit,
levels = c(1:5),
labels = c("Never", "Rarely drive at low-speed", "Sometimes / occasionally dive
at low-speed", "Often dive at low-speed", "Always dive at low-speed"))
# ds$ATT1 <- ordered(ds$ATT1, levels = c(1:5),
# labels = c("Definitely disapprove", "Disapprove", "Neutral", "Approve",
# "Definitely approve"))
# ds$ATT2 <- ordered(ds$ATT2, levels = c(1:5),
# labels = c("Definitely disapprove", "Disapprove", "Neutral", "Approve",
# "Definitely approve"))
# ds$ATT3 <- ordered(ds$ATT3, levels = c(1:5),
# labels = c("Definitely disapprove", "Disapprove", "Neutral", "Approve",
# "Definitely approve"))
# ds$ATT4 <- ordered(ds$ATT4, levels = c(1:5),
# labels = c("Definitely disapprove", "Disapprove", "Neutral", "Approve",
# "Definitely approve"))
att_items <- paste0("ATT",1:4)
sn_items <- paste0("SN",1:4)
pbc_items <- paste0("PBC",1:4)
in_items <- paste0("IN",1:3)
bh_items <- paste0("BH",1:4)
rp_items <- paste0("RP",1:3)
#make the vector c(att_items, sn_items, pbc_items, in_items, bh_items, rp_items)
#as ordered factors with levels from 1-5 with labels "Definitely disapprove"
#to "Definitely approve"
ds[,c(att_items, sn_items, pbc_items, in_items, bh_items, rp_items)] <- lapply(
ds[,c(att_items, sn_items, pbc_items, in_items, bh_items, rp_items)],ordered,
levels = 1:5,
labels = c("Definitely disapprove", "Disapprove", "Neutral", "Approve",
"Definitely approve"))
class(ds$ATT1)
## [1] "ordered" "factor"
The scaleStructure function (which was originally called scaleReliability) computes a number of measures to assess scale reliability and internal consistency. Note that to compute omega, the MBESS and/or the psych packages need to be installed, which are suggested packages and therefore should be installed separately (i.e. won’t be installed automatically).
Omega
The omega (ω) coefficient is also a reliability measure of internal consistency. ω represents an estimate of the general factor saturation of a test that was proposed by McDonald. (Zinbarg et al. 2005) compare McDonald’s Omega to Cronbach’s α and Revelle’s β
They conclude that omega is the best estimate (Zinbarg et al. 2006).
A very handy way to calculate McDonald’s ω is to use the scaleReliability() function from the serfriendlyscience package (which also provides Cronbach’s α and the Greatest Lower Bound (GLB) estimate which is also a very good and innovative measure of reliability) (see also Peters 2014).
ufs::scaleStructure(dat = ds,
items = c("ATT1","ATT2","ATT3", "ATT4"))
Dataframe: | ds |
Items: | ATT1, ATT2, ATT3 & ATT4 |
Observations: | 374 |
Positive correlations: | 6 |
Number of correlations: | 6 |
Percentage positive correlations: | 100 |
Omega (total): | 0.76 |
Omega (hierarchical): | 0.64 |
Revelle’s Omega (total): | 0.76 |
Greatest Lower Bound (GLB): | 0.76 |
Coefficient H: | 0.78 |
Coefficient Alpha: | 0.66 |
(Estimates assuming ordinal level not computed, as the polychoric correlation matrix has missing values.)
Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help (‘?ufs::scaleStructure’) for more information.
library(rio)
# https://rpubs.com/asghar_minaei/857065
dfcor <- dfnum[, c("ATT1","ATT2","ATT3", "ATT4")]
dfcor
## # A tibble: 374 × 4
## ATT1 ATT2 ATT3 ATT4
## <dbl> <dbl> <dbl> <dbl>
## 1 2 2 2 3
## 2 4 2 5 4
## 3 2 3 2 2
## 4 2 2 2 2
## 5 1 2 3 2
## 6 2 2 5 2
## 7 2 2 4 3
## 8 1 1 2 2
## 9 1 1 4 2
## 10 2 3 1 2
## # ℹ 364 more rows
mod.one.fCat <- 'psyctcsm =~ ATT1 + ATT2 + ATT3 + ATT4'
mod.one.fCat
## [1] "psyctcsm =~ ATT1 + ATT2 + ATT3 + ATT4"
library(lavaan)
fit.one.fCat <- cfa(mod.one.fCat, data=dfcor, std.lv=TRUE, ordered=T, estimator='WLSMV')
summary(fit.one.fCat, fit.measures=TRUE, standardized=TRUE)
## lavaan 0.6.17 ended normally after 8 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 20
##
## Number of observations 374
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 18.420 34.011
## Degrees of freedom 2 2
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 0.545
## Shift parameter 0.213
## simple second-order correction
##
## Model Test Baseline Model:
##
## Test statistic 668.168 557.974
## Degrees of freedom 6 6
## P-value 0.000 0.000
## Scaling correction factor 1.200
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.975 0.942
## Tucker-Lewis Index (TLI) 0.926 0.826
##
## Robust Comparative Fit Index (CFI) 0.923
## Robust Tucker-Lewis Index (TLI) 0.768
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.148 0.207
## 90 Percent confidence interval - lower 0.091 0.150
## 90 Percent confidence interval - upper 0.214 0.271
## P-value H_0: RMSEA <= 0.050 0.003 0.000
## P-value H_0: RMSEA >= 0.080 0.974 1.000
##
## Robust RMSEA 0.199
## 90 Percent confidence interval - lower 0.138
## 90 Percent confidence interval - upper 0.267
## P-value H_0: Robust RMSEA <= 0.050 0.000
## P-value H_0: Robust RMSEA >= 0.080 0.999
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.066 0.066
##
## Parameter Estimates:
##
## Parameterization Delta
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## psyctcsm =~
## ATT1 0.829 0.047 17.599 0.000 0.829 0.829
## ATT2 0.819 0.047 17.592 0.000 0.819 0.819
## ATT3 0.441 0.053 8.389 0.000 0.441 0.441
## ATT4 0.470 0.051 9.142 0.000 0.470 0.470
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ATT1|t1 -0.458 0.067 -6.790 0.000 -0.458 -0.458
## ATT1|t2 0.306 0.066 4.640 0.000 0.306 0.306
## ATT1|t3 0.950 0.077 12.377 0.000 0.950 0.950
## ATT1|t4 1.691 0.113 14.979 0.000 1.691 1.691
## ATT2|t1 -0.605 0.069 -8.712 0.000 -0.605 -0.605
## ATT2|t2 0.356 0.066 5.359 0.000 0.356 0.356
## ATT2|t3 1.319 0.090 14.620 0.000 1.319 1.319
## ATT2|t4 2.408 0.210 11.454 0.000 2.408 2.408
## ATT3|t1 -0.971 0.077 -12.556 0.000 -0.971 -0.971
## ATT3|t2 -0.114 0.065 -1.755 0.079 -0.114 -0.114
## ATT3|t3 0.534 0.068 7.806 0.000 0.534 0.534
## ATT3|t4 1.109 0.082 13.575 0.000 1.109 1.109
## ATT4|t1 -0.878 0.075 -11.734 0.000 -0.878 -0.878
## ATT4|t2 0.320 0.066 4.846 0.000 0.320 0.320
## ATT4|t3 0.971 0.077 12.556 0.000 0.971 0.971
## ATT4|t4 1.565 0.104 15.064 0.000 1.565 1.565
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ATT1 0.312 0.312 0.312
## .ATT2 0.329 0.329 0.329
## .ATT3 0.805 0.805 0.805
## .ATT4 0.779 0.779 0.779
## psyctcsm 1.000 1.000 1.000
#install.packages("splithalf")
library(splithalfr)
# Calculate coefficient based on ICC, two-way, random effects, absolute agreement, single rater
spearman_brown(dfcor[, "ATT1"], dfcor[, "ATT2"])
## ATT2
## ATT1 0.771888
# ATT2
# ATT1 0.771888
library(psychometric)
Details
Nlength represents a ratio of new to original. If the new test has 10 items, and the original test has 5 items, Nlength is 2. Likewise, if the original test has 5 items, and the new test has 10 items, Nlength is .5. In general, researchers should aim for reliabilities > .9.
SBrel is used to address the question, what if I increased/decreased my test length? What will the new reliability be? This is used when computing split-half reliabilities and when when concerned about reducing test length. SBlength is used to address the question, how long must my test be (in relation to the original test) in order to achieve a desired reliability? The formulae for each are: \[rxxp <- Nlength*rxx/(1+(Nlength-1)*rxx)\]
\[ N <- rxxp*(1-rxx)/(rxx*(1-rxxp))\]
Nlength = tamanho do novo dataset/ tamanho do antigo dataset (10/5)=2
#SBrel(Nlength, rxx)
SBrel(374, .77)
## [1] 0.999202
# [1] 0.999202
#SBlength(rxxp, rxx)
nrow(dfcor)
## [1] 374
# [1] 374
SBlength(.85, .77)*nrow(dfcor)
## [1] 633.0476
# [1] 633.0476