Twenty two wheeler users were surveyed about the perceptions and image attributes of the vehicles they owned. Ten questions were asked to each of them, all answered on a scale of 1 to 7 ( 1= completely agree, 7 = completely disagree)
Reading Data
raqData <- read.csv("TwoWheeler.csv",header = TRUE)
Calculate the correlation matrix
raqmatrix <- round(cor(raqData),2)
raqmatrix
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## X1 1.00 -0.18 0.55 0.07 0.17 0.17 0.20 0.31 0.26 0.48
## X2 -0.18 1.00 -0.55 -0.09 -0.33 0.06 -0.21 -0.32 -0.25 0.02
## X3 0.55 -0.55 1.00 -0.14 0.00 -0.18 -0.05 0.44 0.41 0.32
## X4 0.07 -0.09 -0.14 1.00 0.90 0.94 0.91 -0.28 -0.02 0.09
## X5 0.17 -0.33 0.00 0.90 1.00 0.85 0.96 -0.14 0.06 0.12
## X6 0.17 0.06 -0.18 0.94 0.85 1.00 0.89 -0.33 -0.05 0.07
## X7 0.20 -0.21 -0.05 0.91 0.96 0.89 1.00 -0.22 0.00 0.17
## X8 0.31 -0.32 0.44 -0.28 -0.14 -0.33 -0.22 1.00 0.81 0.06
## X9 0.26 -0.25 0.41 -0.02 0.06 -0.05 0.00 0.81 1.00 -0.10
## X10 0.48 0.02 0.32 0.09 0.12 0.07 0.17 0.06 -0.10 1.00
Run Barlette Test using psych package
library(psych)
## Warning: package 'psych' was built under R version 3.6.1
cortest.bartlett(raqData)
## R was not square, finding R from data
## $chisq
## [1] 162.0597
##
## $p.value
## [1] 3.975173e-15
##
## $df
## [1] 45
The test data is highly significant and therefore factor analysis is appropriate
KMO Test
# KMO Kaiser-Meyer-Olkin Measure of Sampling Adequacy
# Function by G. Jay Kerns, Ph.D., Youngstown State University (http://tolstoy.newcastle.edu.au/R/e2/help/07/08/22816.html)
kmo = function( data ){
library(MASS)
X <- cor(as.matrix(data))
iX <- ginv(X)
S2 <- diag(diag((iX^-1)))
AIS <- S2%*%iX%*%S2 # anti-image covariance matrix
IS <- X+AIS-2*S2 # image covariance matrix
Dai <- sqrt(diag(diag(AIS)))
IR <- ginv(Dai)%*%IS%*%ginv(Dai) # image correlation matrix
AIR <- ginv(Dai)%*%AIS%*%ginv(Dai) # anti-image correlation matrix
a <- apply((AIR - diag(diag(AIR)))^2, 2, sum)
AA <- sum(a)
b <- apply((X - diag(nrow(X)))^2, 2, sum)
BB <- sum(b)
MSA <- b/(b+a) # indiv. measures of sampling adequacy
AIR <- AIR-diag(nrow(AIR))+diag(MSA)
# Examine the anti-image of the correlation matrix. That is the negative of the partial correlations, partialling out all other variables.
kmo <- BB/(AA+BB) # overall KMO statistic
# Reporting the conclusion
if (kmo >= 0.00 && kmo < 0.50){test <- 'The KMO test yields a degree of common variance unacceptable for FA.'}
else if (kmo >= 0.50 && kmo < 0.60){test <- 'The KMO test yields a degree of common variance miserable.'}
else if (kmo >= 0.60 && kmo < 0.70){test <- 'The KMO test yields a degree of common variance mediocre.'}
else if (kmo >= 0.70 && kmo < 0.80){test <- 'The KMO test yields a degree of common variance middling.' }
else if (kmo >= 0.80 && kmo < 0.90){test <- 'The KMO test yields a degree of common variance meritorious.' }
else { test <- 'The KMO test yields a degree of common variance marvelous.' }
ans <- list( overall = kmo,
report = test,
individual = MSA,
AIS = AIS,
AIR = AIR )
return(ans)
}
#To use this function:
kmo(raqData)
## $overall
## [1] 0.632722
##
## $report
## [1] "The KMO test yields a degree of common variance mediocre."
##
## $individual
## X1 X2 X3 X4 X5 X6 X7
## 0.4963513 0.3664454 0.5994228 0.7566516 0.7504714 0.6672545 0.8213176
## X8 X9 X10
## 0.5723553 0.4794491 0.3234788
##
## $AIS
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.38412488 0.034994642 -0.132056861 0.0659954137 0.024030673
## [2,] 0.03499464 0.265510720 0.158465165 0.0262535151 0.060310309
## [3,] -0.13205686 0.158465165 0.340631354 -0.0026323743 0.014841005
## [4,] 0.06599541 0.026253515 -0.002632374 0.0587369613 -0.001697965
## [5,] 0.02403067 0.060310309 0.014841005 -0.0016979654 0.053252161
## [6,] -0.06901473 -0.060179169 -0.001605786 -0.0392011556 -0.014605196
## [7,] -0.02762426 -0.007386612 0.009569241 -0.0122913404 -0.038725206
## [8,] -0.07053457 0.036293283 0.060811832 -0.0082664045 -0.011680505
## [9,] 0.02689327 -0.056888486 -0.103019828 -0.0001919385 -0.002027330
## [10,] -0.15013082 -0.163129871 -0.145696556 -0.0364325616 -0.019800238
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.069014729 -0.027624262 -0.070534568 0.0268932735 -0.15013082
## [2,] -0.060179169 -0.007386612 0.036293283 -0.0568884860 -0.16312987
## [3,] -0.001605786 0.009569241 0.060811832 -0.1030198276 -0.14569656
## [4,] -0.039201156 -0.012291340 -0.008266404 -0.0001919385 -0.03643256
## [5,] -0.014605196 -0.038725206 -0.011680505 -0.0020273299 -0.01980024
## [6,] 0.050202583 -0.001108273 0.022263338 -0.0091699911 0.05264103
## [7,] -0.001108273 0.060479727 0.015959726 -0.0064485731 -0.01978761
## [8,] 0.022263338 0.015959726 0.199816915 -0.1743651690 -0.07511575
## [9,] -0.009169991 -0.006448573 -0.174365169 0.2221877025 0.12982179
## [10,] 0.052641026 -0.019787610 -0.075115747 0.1298217926 0.50030101
##
## $AIR
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.49635131 0.10957824 -0.36507529 0.439361301 0.16802005
## [2,] 0.10957824 0.36644544 0.52692702 0.210228064 0.50720310
## [3,] -0.36507529 0.52692702 0.59942281 -0.018610138 0.11019259
## [4,] 0.43936130 0.21022806 -0.01861014 0.756651631 -0.03036019
## [5,] 0.16802005 0.50720310 0.11019259 -0.030360190 0.75047140
## [6,] -0.49698415 -0.52124527 -0.01227955 -0.721905088 -0.28247222
## [7,] -0.18123812 -0.05829071 0.06666997 -0.206223711 -0.68237045
## [8,] -0.25459488 0.15756839 0.23309323 -0.076303558 -0.11323401
## [9,] 0.09205501 -0.23421965 -0.37447127 -0.001680143 -0.01863785
## [10,] -0.34246639 -0.44758629 -0.35293220 -0.212529040 -0.12130707
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.49698415 -0.18123812 -0.25459488 0.092055013 -0.3424664
## [2,] -0.52124527 -0.05829071 0.15756839 -0.234219655 -0.4475863
## [3,] -0.01227955 0.06666997 0.23309323 -0.374471266 -0.3529322
## [4,] -0.72190509 -0.20622371 -0.07630356 -0.001680143 -0.2125290
## [5,] -0.28247222 -0.68237045 -0.11323401 -0.018637849 -0.1213071
## [6,] 0.66725450 -0.02011309 0.22228550 -0.086825216 0.3321587
## [7,] -0.02011309 0.82131762 0.14517920 -0.055628674 -0.1137556
## [8,] 0.22228550 0.14517920 0.57235529 -0.827529722 -0.2375741
## [9,] -0.08682522 -0.05562867 -0.82752972 0.479449123 0.3893784
## [10,] 0.33215869 -0.11375559 -0.23757414 0.389378441 0.3234788
Overall KMO and individual KMO is more than 0.5 hence we can use it for the analysis
Lets see the determinant of correlation matrix.
det(raqmatrix)
## [1] 1.604297e-05
It is more than 0.00001 Now Extracting Factors. First set number of factors equal to that of variables.
pc1 <- principal(raqData,nfactors=10,rotate="none")
pc1
## Principal Components Analysis
## Call: principal(r = raqData, nfactors = 10, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 h2 u2
## X1 0.17 0.67 0.49 0.19 -0.42 -0.25 -0.02 -0.02 -0.01 0.02 1 3.3e-16
## X2 -0.13 -0.61 0.25 0.70 -0.09 0.18 0.02 0.11 0.00 0.03 1 5.6e-16
## X3 -0.11 0.82 0.21 -0.29 -0.16 0.38 0.06 0.07 0.00 0.00 1 2.2e-16
## X4 0.97 -0.02 -0.11 0.07 0.04 0.09 0.12 -0.12 -0.08 0.08 1 1.6e-15
## X5 0.95 0.17 -0.13 -0.09 0.09 -0.06 -0.05 0.11 0.11 0.07 1 1.2e-15
## X6 0.95 -0.07 -0.03 0.19 -0.13 0.07 0.08 -0.08 0.08 -0.10 1 0.0e+00
## X7 0.97 0.10 -0.04 -0.01 0.05 -0.03 -0.08 0.15 -0.11 -0.06 1 1.4e-15
## X8 -0.33 0.77 -0.31 0.31 0.21 -0.16 0.19 0.07 0.00 -0.01 1 4.4e-16
## X9 -0.09 0.72 -0.49 0.42 0.03 0.13 -0.18 -0.08 0.00 0.00 1 7.8e-16
## X10 0.16 0.32 0.81 0.11 0.44 0.03 -0.04 -0.05 0.01 -0.01 1 3.3e-16
## com
## X1 3.3
## X2 2.6
## X3 2.0
## X4 1.1
## X5 1.2
## X6 1.2
## X7 1.1
## X8 2.6
## X9 2.8
## X10 2.1
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 3.89 2.76 1.38 0.95 0.48 0.30 0.10 0.09 0.04 0.03
## Proportion Var 0.39 0.28 0.14 0.09 0.05 0.03 0.01 0.01 0.00 0.00
## Cumulative Var 0.39 0.66 0.80 0.90 0.94 0.97 0.99 0.99 1.00 1.00
## Proportion Explained 0.39 0.28 0.14 0.09 0.05 0.03 0.01 0.01 0.00 0.00
## Cumulative Proportion 0.39 0.66 0.80 0.90 0.94 0.97 0.99 0.99 1.00 1.00
##
## Mean item complexity = 2
## Test of the hypothesis that 10 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
SS loading or eigen values suggests that only first four components are sufficient. Also fit based on off diagonal values is greater than 0.96 Scree plot
plot(pc1$values,type="b")
Rerunning the analysis with 4 factors
pc2 <- principal(raqData,nfactors=length(raqData),rotate="none")
pc2
## Principal Components Analysis
## Call: principal(r = raqData, nfactors = length(raqData), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 h2 u2
## X1 0.17 0.67 0.49 0.19 -0.42 -0.25 -0.02 -0.02 -0.01 0.02 1 3.3e-16
## X2 -0.13 -0.61 0.25 0.70 -0.09 0.18 0.02 0.11 0.00 0.03 1 5.6e-16
## X3 -0.11 0.82 0.21 -0.29 -0.16 0.38 0.06 0.07 0.00 0.00 1 2.2e-16
## X4 0.97 -0.02 -0.11 0.07 0.04 0.09 0.12 -0.12 -0.08 0.08 1 1.6e-15
## X5 0.95 0.17 -0.13 -0.09 0.09 -0.06 -0.05 0.11 0.11 0.07 1 1.2e-15
## X6 0.95 -0.07 -0.03 0.19 -0.13 0.07 0.08 -0.08 0.08 -0.10 1 0.0e+00
## X7 0.97 0.10 -0.04 -0.01 0.05 -0.03 -0.08 0.15 -0.11 -0.06 1 1.4e-15
## X8 -0.33 0.77 -0.31 0.31 0.21 -0.16 0.19 0.07 0.00 -0.01 1 4.4e-16
## X9 -0.09 0.72 -0.49 0.42 0.03 0.13 -0.18 -0.08 0.00 0.00 1 7.8e-16
## X10 0.16 0.32 0.81 0.11 0.44 0.03 -0.04 -0.05 0.01 -0.01 1 3.3e-16
## com
## X1 3.3
## X2 2.6
## X3 2.0
## X4 1.1
## X5 1.2
## X6 1.2
## X7 1.1
## X8 2.6
## X9 2.8
## X10 2.1
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 3.89 2.76 1.38 0.95 0.48 0.30 0.10 0.09 0.04 0.03
## Proportion Var 0.39 0.28 0.14 0.09 0.05 0.03 0.01 0.01 0.00 0.00
## Cumulative Var 0.39 0.66 0.80 0.90 0.94 0.97 0.99 0.99 1.00 1.00
## Proportion Explained 0.39 0.28 0.14 0.09 0.05 0.03 0.01 0.01 0.00 0.00
## Cumulative Proportion 0.39 0.66 0.80 0.90 0.94 0.97 0.99 0.99 1.00 1.00
##
## Mean item complexity = 2
## Test of the hypothesis that 10 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
Here PC1, PC2 and PC3 are giving the eigen value >1. Lets see the scree plot:
plot(pc1$values, type = "b")
As scree plot gave only 2 factors lets rerun with 3 factors and see
pc2 <- principal(raqData, nfactors = 3, rotate = "none")
pc2
## Principal Components Analysis
## Call: principal(r = raqData, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 0.17 0.67 0.49 0.72 0.278 2.0
## X2 -0.13 -0.61 0.25 0.45 0.546 1.4
## X3 -0.11 0.82 0.21 0.74 0.265 1.2
## X4 0.97 -0.02 -0.11 0.95 0.055 1.0
## X5 0.95 0.17 -0.13 0.95 0.051 1.1
## X6 0.95 -0.07 -0.03 0.91 0.088 1.0
## X7 0.97 0.10 -0.04 0.95 0.046 1.0
## X8 -0.33 0.77 -0.31 0.79 0.206 1.7
## X9 -0.09 0.72 -0.49 0.77 0.233 1.8
## X10 0.16 0.32 0.81 0.79 0.213 1.4
##
## PC1 PC2 PC3
## SS loadings 3.89 2.76 1.38
## Proportion Var 0.39 0.28 0.14
## Cumulative Var 0.39 0.66 0.80
## Proportion Explained 0.48 0.34 0.17
## Cumulative Proportion 0.48 0.83 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 12.67 with prob < 0.81
##
## Fit based upon off diagonal values = 0.96
Lets see if the factors are correct, we find the corr matrix from pc2
loadings <- factor.model(pc2$loadings)
communality <- diag(loadings)
communality
## X1 X2 X3 X4 X5 X6 X7
## 0.7220787 0.4535572 0.7353256 0.9450383 0.9487024 0.9123104 0.9540182
## X8 X9 X10
## 0.7938690 0.7671693 0.7865355
The diagonals of this matrix contains the communalities after extraction. Lets see the difference it is called residual
residuals <- factor.residuals(raqmatrix,pc2$loadings)
uniqueness <- diag(residuals)
uniqueness
## X1 X2 X3 X4 X5 X6
## 0.27792134 0.54644278 0.26467441 0.05496171 0.05129763 0.08768960
## X7 X8 X9 X10
## 0.04598180 0.20613101 0.23283072 0.21346448
The diagonal of this matrix is the uniqueness.
residuals<-as.matrix(residuals[upper.tri(residuals)])
This command re-creates the object residuals by using only the upper triangle of the original matrix. We now have an object called residuals that contains the residuals stored in a column. This is handy because it makes it easy to calculate various things.
large.resid<-abs(residuals) > 0.05
# proportion of the large residuals
sum(large.resid)/nrow(residuals)
## [1] 0.3555556
Some other residuals stats, such as the mean, are skipped here.
We can set rotate=“varimax” in the principal() function. But there are too many things to see.
print.psych() command prints the factor loading matrix associated with the model pc3, but displaying only loadings above .3 (cut = 0.3) and sorting items by the size of their loadings (sort = TRUE).
pc3 <- principal(raqData, nfactors=3, rotate="varimax")
pc3
## Principal Components Analysis
## Call: principal(r = raqData, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## X1 0.13 0.31 0.78 0.72 0.278 1.4
## X2 -0.18 -0.64 -0.11 0.45 0.546 1.2
## X3 -0.11 0.61 0.59 0.74 0.265 2.1
## X4 0.97 -0.06 -0.01 0.95 0.055 1.0
## X5 0.96 0.12 0.07 0.95 0.051 1.0
## X6 0.94 -0.14 0.03 0.91 0.088 1.0
## X7 0.97 0.01 0.11 0.95 0.046 1.0
## X8 -0.26 0.85 0.10 0.79 0.206 1.2
## X9 0.00 0.87 -0.06 0.77 0.233 1.0
## X10 0.06 -0.15 0.87 0.79 0.213 1.1
##
## RC1 RC2 RC3
## SS loadings 3.84 2.41 1.77
## Proportion Var 0.38 0.24 0.18
## Cumulative Var 0.38 0.63 0.80
## Proportion Explained 0.48 0.30 0.22
## Cumulative Proportion 0.48 0.78 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 12.67 with prob < 0.81
##
## Fit based upon off diagonal values = 0.96
print.psych(pc3, cut = 0.3)
## Principal Components Analysis
## Call: principal(r = raqData, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## X1 0.31 0.78 0.72 0.278 1.4
## X2 -0.64 0.45 0.546 1.2
## X3 0.61 0.59 0.74 0.265 2.1
## X4 0.97 0.95 0.055 1.0
## X5 0.96 0.95 0.051 1.0
## X6 0.94 0.91 0.088 1.0
## X7 0.97 0.95 0.046 1.0
## X8 0.85 0.79 0.206 1.2
## X9 0.87 0.77 0.233 1.0
## X10 0.87 0.79 0.213 1.1
##
## RC1 RC2 RC3
## SS loadings 3.84 2.41 1.77
## Proportion Var 0.38 0.24 0.18
## Cumulative Var 0.38 0.63 0.80
## Proportion Explained 0.48 0.30 0.22
## Cumulative Proportion 0.48 0.78 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 12.67 with prob < 0.81
##
## Fit based upon off diagonal values = 0.96
There is no problem interpreting factor 1 as a combination of ‘a man’s vehicle’(X4), ‘feeling of power’(X5), ‘others are jealous of me’ (X6) and ‘feel good when I see my two-wheeler ads’(X7). The suitable phrase can be “Male Ego” or “Pride of Ownership”
For factor 2 there X3, X8 and X9, the statments are “low maintenance”, “comfort” and “safety”, we call them “utility”
for Factor 3 it is X1 and X10 “affordabiilty” and “cost saving”. We can call it “economy”
Variable 2 has a communality (h2) of only 0.52, which means that only 52% of the variance of variable 2 is captured by our extracted factors. It is possible that variable 2 is an independent variable which is not combining well with any other variable and therefor should be further investigated separately. “Freedom” could be a different concept in the mind of our target audience. Label it
By setting scores=TRUE:
head(pc3$scores) # access scores by pc5$scores
## RC1 RC2 RC3
## [1,] 1.68530395 -0.3948390 -1.1094773
## [2,] 0.23779462 0.9906883 -0.7793819
## [3,] -0.99374629 1.7696806 -0.8914126
## [4,] -0.55854987 0.3747305 1.5086186
## [5,] 0.92701421 -0.6054475 -0.6140184
## [6,] 0.06020236 1.4057439 0.1583126
raqData1 <- cbind(raqData[,1:10], pc3$scores)
# bind the factor scores to raqData dataframe for other use
head(raqData1)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 RC1 RC2 RC3
## 1 1 4 1 6 5 6 5 2 3 2 1.68530395 -0.3948390 -1.1094773
## 2 2 3 2 4 3 3 3 5 5 2 0.23779462 0.9906883 -0.7793819
## 3 2 2 2 1 2 1 1 7 6 2 -0.99374629 1.7696806 -0.8914126
## 4 5 1 4 2 2 2 2 3 2 3 -0.55854987 0.3747305 1.5086186
## 5 1 2 2 5 4 4 4 1 1 2 0.92701421 -0.6054475 -0.6140184
## 6 3 2 3 3 3 3 3 6 5 3 0.06020236 1.4057439 0.1583126
biplot.psych(pc2)