Import data
library(corpcor)
library(GPArotation)
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(reshape2)
library(rio)
FullData <- read.csv("/Users/Lorraine/Desktop/Project3EFA.csv")
Bartlett’s Test of Sphericity and the KMO index
# Compute correlation matrix for all variables in dataset
Nomiss.FullData <- na.omit(FullData)
efacormatrix <- cor(Nomiss.FullData)
# visualize matrix with cells to 2 decimal places
round(efacormatrix, 2)
## X100.m Long.Jump Shotput High.Jump X400.m X110.m Discus
## X100.m 1.00 0.59 0.35 0.34 0.63 0.40 0.28
## Long.Jump 0.59 1.00 0.42 0.51 0.49 0.52 0.31
## Shotput 0.35 0.42 1.00 0.38 0.19 0.36 0.73
## High.Jump 0.34 0.51 0.38 1.00 0.29 0.46 0.27
## X400.m 0.63 0.49 0.19 0.29 1.00 0.34 0.17
## X110.m 0.40 0.52 0.36 0.46 0.34 1.00 0.32
## Discus 0.28 0.31 0.73 0.27 0.17 0.32 1.00
## Pole.vault 0.20 0.36 0.24 0.39 0.23 0.33 0.24
## Javelin 0.11 0.21 0.44 0.17 0.13 0.18 0.34
## X1500.m -0.07 0.09 -0.08 0.18 0.39 0.00 -0.02
## Pole.vault Javelin X1500.m
## X100.m 0.20 0.11 -0.07
## Long.Jump 0.36 0.21 0.09
## Shotput 0.24 0.44 -0.08
## High.Jump 0.39 0.17 0.18
## X400.m 0.23 0.13 0.39
## X110.m 0.33 0.18 0.00
## Discus 0.24 0.34 -0.02
## Pole.vault 1.00 0.24 0.17
## Javelin 0.24 1.00 0.00
## X1500.m 0.17 0.00 1.00
# Run Bartlett's test and KMO
cortest.bartlett(efacormatrix, n = 250)
## $chisq
## [1] 905.875
##
## $p.value
## [1] 1.097871e-160
##
## $df
## [1] 45
KMO(efacormatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efacormatrix)
## Overall MSA = 0.73
## MSA for each item =
## X100.m Long.Jump Shotput High.Jump X400.m X110.m
## 0.66 0.87 0.71 0.83 0.61 0.88
## Discus Pole.vault Javelin X1500.m
## 0.71 0.86 0.79 0.29
principal components analysis
# Run a PC Analysis from raw data with all possible factors
pc1 <- principal(FullData, nfactors = 10, rotate = "none")
pc1
## Principal Components Analysis
## Call: principal(r = FullData, nfactors = 10, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 h2
## X100.m 0.69 0.22 -0.52 0.21 0.13 -0.15 -0.13 0.15 0.04 0.29 1
## Long.Jump 0.79 0.18 -0.19 -0.09 0.06 0.07 -0.13 -0.52 0.06 -0.05 1
## Shotput 0.70 -0.53 0.05 0.18 -0.22 -0.03 -0.08 -0.03 -0.36 0.01 1
## High.Jump 0.67 0.13 0.14 -0.40 -0.23 0.29 -0.39 0.24 0.06 -0.05 1
## X400.m 0.62 0.55 -0.08 0.42 0.11 -0.06 0.08 0.16 -0.05 -0.29 1
## X110.m 0.69 0.04 -0.16 -0.34 -0.05 0.25 0.56 0.06 -0.03 0.05 1
## Discus 0.62 -0.52 0.11 0.23 -0.36 -0.23 0.11 0.02 0.29 -0.04 1
## Pole.vault 0.54 0.09 0.41 -0.44 0.27 -0.51 0.01 0.03 -0.04 0.00 1
## Javelin 0.43 -0.44 0.37 0.23 0.57 0.31 -0.02 0.03 0.07 0.03 1
## X1500.m 0.15 0.60 0.66 0.28 -0.24 0.08 0.07 -0.09 -0.02 0.18 1
## u2 com
## X100.m 1.1e-16 3.2
## Long.Jump -8.9e-16 2.2
## Shotput 7.8e-16 2.9
## High.Jump 1.2e-15 3.7
## X400.m -6.7e-16 3.6
## X110.m 3.3e-16 3.0
## Discus 1.0e-15 4.0
## Pole.vault 1.4e-15 4.5
## Javelin 1.7e-15 4.8
## X1500.m 8.9e-16 3.1
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 3.79 1.52 1.11 0.91 0.72 0.59 0.53 0.38 0.24 0.21
## Proportion Var 0.38 0.15 0.11 0.09 0.07 0.06 0.05 0.04 0.02 0.02
## Cumulative Var 0.38 0.53 0.64 0.73 0.81 0.86 0.92 0.96 0.98 1.00
## Proportion Explained 0.38 0.15 0.11 0.09 0.07 0.06 0.05 0.04 0.02 0.02
## Cumulative Proportion 0.38 0.53 0.64 0.73 0.81 0.86 0.92 0.96 0.98 1.00
##
## Mean item complexity = 3.5
## Test of the hypothesis that 10 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
# Let's rotate the solution (orthogonal)
pc2 <- principal(efacormatrix, nfactors = 3, rotate = "varimax")
print.psych(pc2, cut = .3, sort = TRUE)
## Principal Components Analysis
## Call: principal(r = efacormatrix, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item RC1 RC2 RC3 h2 u2 com
## X100.m 1 0.88 0.79 0.21 1.1
## Long.Jump 2 0.78 0.69 0.31 1.3
## X400.m 5 0.74 0.38 0.69 0.31 1.5
## X110.m 6 0.63 0.32 0.50 0.50 1.5
## High.Jump 4 0.51 0.35 0.34 0.49 0.51 2.6
## Shotput 3 0.31 0.82 0.78 0.22 1.3
## Discus 7 0.79 0.67 0.33 1.2
## Javelin 9 0.70 0.52 0.48 1.1
## X1500.m 10 0.89 0.81 0.19 1.0
## Pole.vault 8 0.40 0.50 0.47 0.53 2.4
##
## RC1 RC2 RC3
## SS loadings 2.78 2.26 1.38
## Proportion Var 0.28 0.23 0.14
## Cumulative Var 0.28 0.50 0.64
## Proportion Explained 0.43 0.35 0.21
## Cumulative Proportion 0.43 0.79 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
##
## Fit based upon off diagonal values = 0.92
Factor Analysis
paf3 <- factanal(Nomiss.FullData, factors = 6, rotation = "none", na.action = na.omit)
paf3
##
## Call:
## factanal(x = Nomiss.FullData, factors = 6, na.action = na.omit, rotation = "none")
##
## Uniquenesses:
## X100.m Long.Jump Shotput High.Jump X400.m X110.m
## 0.165 0.404 0.005 0.412 0.293 0.005
## Discus Pole.vault Javelin X1500.m
## 0.453 0.633 0.631 0.005
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## X100.m 0.468 0.780
## Long.Jump 0.559 0.195 0.429 0.239
## Shotput 0.827 0.558
## High.Jump 0.486 0.270 0.162 0.483 -0.135
## X400.m 0.280 0.455 0.625 -0.120 0.116
## X110.m 0.808 0.203 -0.548
## Discus 0.635 0.368
## Pole.vault 0.325 0.234 0.380 0.230
## Javelin 0.374 0.236 0.404
## X1500.m -0.162 0.976 0.123
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 2.859 1.371 1.219 0.830 0.461 0.255
## Proportion Var 0.286 0.137 0.122 0.083 0.046 0.025
## Cumulative Var 0.286 0.423 0.545 0.628 0.674 0.700
##
## The degrees of freedom for the model is 0 and the fit was 0.0111
paf3$loadings
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## X100.m 0.468 0.780
## Long.Jump 0.559 0.195 0.429 0.239
## Shotput 0.827 0.558
## High.Jump 0.486 0.270 0.162 0.483 -0.135
## X400.m 0.280 0.455 0.625 -0.120 0.116
## X110.m 0.808 0.203 -0.548
## Discus 0.635 0.368
## Pole.vault 0.325 0.234 0.380 0.230
## Javelin 0.374 0.236 0.404
## X1500.m -0.162 0.976 0.123
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 2.859 1.371 1.219 0.830 0.461 0.255
## Proportion Var 0.286 0.137 0.122 0.083 0.046 0.025
## Cumulative Var 0.286 0.423 0.545 0.628 0.674 0.700
# Orthogonal Rotation with low loadings suppressed
paf4 <- factanal(Nomiss.FullData, factors = 3, rotation = "varimax", na.action = na.omit)
print(paf4)
##
## Call:
## factanal(x = Nomiss.FullData, factors = 3, na.action = na.omit, rotation = "varimax")
##
## Uniquenesses:
## X100.m Long.Jump Shotput High.Jump X400.m X110.m
## 0.005 0.484 0.149 0.656 0.339 0.694
## Discus Pole.vault Javelin X1500.m
## 0.397 0.778 0.767 0.511
##
## Loadings:
## Factor1 Factor2 Factor3
## X100.m 0.975 0.146 -0.151
## Long.Jump 0.586 0.354 0.218
## Shotput 0.215 0.895
## High.Jump 0.340 0.372 0.300
## X400.m 0.700 0.409
## X110.m 0.386 0.354 0.180
## Discus 0.170 0.757
## Pole.vault 0.212 0.278 0.315
## Javelin 0.473
## X1500.m 0.695
##
## Factor1 Factor2 Factor3
## SS loadings 2.174 2.092 0.953
## Proportion Var 0.217 0.209 0.095
## Cumulative Var 0.217 0.427 0.522
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 92.65 on 18 degrees of freedom.
## The p-value is 4.81e-12
print(loadings(paf4), digits = 2, cutoff = .3, sort = TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3
## X100.m 0.98
## Long.Jump 0.59 0.35
## X400.m 0.70 0.41
## Shotput 0.89
## Discus 0.76
## X1500.m 0.69
## High.Jump 0.34 0.37 0.30
## X110.m 0.39 0.35
## Pole.vault 0.32
## Javelin 0.47
##
## Factor1 Factor2 Factor3
## SS loadings 2.17 2.09 0.95
## Proportion Var 0.22 0.21 0.10
## Cumulative Var 0.22 0.43 0.52
# Oblique Rotation
paf5 <- factanal(Nomiss.FullData, factors = 6, rotation = "oblimin", na.action = na.omit)
print(paf5)
##
## Call:
## factanal(x = Nomiss.FullData, factors = 6, na.action = na.omit, rotation = "oblimin")
##
## Uniquenesses:
## X100.m Long.Jump Shotput High.Jump X400.m X110.m
## 0.165 0.404 0.005 0.412 0.293 0.005
## Discus Pole.vault Javelin X1500.m
## 0.453 0.633 0.631 0.005
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## X100.m 0.872 -0.159
## Long.Jump 0.401 0.117 0.376
## Shotput 0.996
## High.Jump 0.679
## X400.m 0.735 0.292
## X110.m 1.001
## Discus 0.705
## Pole.vault 0.429 0.338
## Javelin 0.302 0.453
## X1500.m 0.996
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 1.601 1.463 1.120 1.033 0.800 0.341
## Proportion Var 0.160 0.146 0.112 0.103 0.080 0.034
## Cumulative Var 0.160 0.306 0.418 0.522 0.602 0.636
##
## Factor Correlations:
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## Factor1 1.0000 -0.0967 -0.3629 -0.299 0.422 0.325
## Factor2 -0.0967 1.0000 -0.0187 -0.112 0.126 0.240
## Factor3 -0.3629 -0.0187 1.0000 0.416 -0.544 -0.214
## Factor4 -0.2994 -0.1119 0.4158 1.000 -0.402 -0.137
## Factor5 0.4216 0.1265 -0.5436 -0.402 1.000 0.222
## Factor6 0.3251 0.2401 -0.2138 -0.137 0.222 1.000
##
## The degrees of freedom for the model is 0 and the fit was 0.0111
paf5$loadings
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## X100.m 0.872 -0.159
## Long.Jump 0.401 0.117 0.376
## Shotput 0.996
## High.Jump 0.679
## X400.m 0.735 0.292
## X110.m 1.001
## Discus 0.705
## Pole.vault 0.429 0.338
## Javelin 0.302 0.453
## X1500.m 0.996
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 1.601 1.463 1.120 1.033 0.800 0.341
## Proportion Var 0.160 0.146 0.112 0.103 0.080 0.034
## Cumulative Var 0.160 0.306 0.418 0.522 0.602 0.636
paf6 <- factanal(Nomiss.FullData, factors = 4, rotation = "oblimin", na.action = na.omit)
print(loadings(paf6), digits = 2, cutoff = .3, sort = TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## Shotput 1.00
## Discus 0.72
## Long.Jump 0.55 0.31
## High.Jump 0.69
## X110.m 0.65
## Pole.vault 0.59
## X100.m 0.87
## X400.m 0.71 0.33
## X1500.m 1.00
## Javelin 0.41
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 1.70 1.57 1.38 1.15
## Proportion Var 0.17 0.16 0.14 0.11
## Cumulative Var 0.17 0.33 0.47 0.58
Scree Plot
plot(pc1$values, type = "b")

#rerun data with 5 factors
paf7 <- factanal(Nomiss.FullData, factors = 5, rotation = "varimax", na.action = na.omit)
print(loadings(paf7), digits = 2, cutoff = .3, sort = TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5
## Shotput 0.96
## Discus 0.69
## X100.m 0.82
## X400.m 0.76 0.33
## Long.Jump 0.48 0.58
## High.Jump 0.69
## X110.m 0.57
## Pole.vault 0.50
## X1500.m 0.99
## Javelin 0.41 0.40
##
## Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings 1.74 1.62 1.62 1.14 0.29
## Proportion Var 0.17 0.16 0.16 0.11 0.03
## Cumulative Var 0.17 0.34 0.50 0.61 0.64
paf8 <- factanal(Nomiss.FullData, factors = 5, rotation = "oblimin", na.action = na.omit)
print(loadings(paf8), digits = 2, cutoff = .3, sort = TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5
## Shotput 1.00
## Discus 0.71
## X100.m 0.82
## X400.m 0.79
## Long.Jump 0.35 0.52
## High.Jump 0.73
## X110.m 0.57
## Pole.vault 0.54
## X1500.m 0.99
## Javelin 0.36 0.39
##
## Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings 1.66 1.45 1.43 1.12 0.26
## Proportion Var 0.17 0.14 0.14 0.11 0.03
## Cumulative Var 0.17 0.31 0.45 0.57 0.59
parallel analysis
fa.parallel(efacormatrix,
n.obs = 250, fm = "minres", fa = "both", main = "Parallel Analysis Scree Plots",
n.iter = 200, error.bars = FALSE, se.bars = FALSE, SMC = FALSE, ylabel = NULL, show.legend = TRUE,
sim = TRUE, quant = .95, plot = TRUE, correct = .5
)

## Parallel analysis suggests that the number of factors = 4 and the number of components = 2
#rerun data with 4 factors with varimax
paf9 <- factanal(Nomiss.FullData, factors = 4, rotation = "varimax", na.action = na.omit)
print(loadings(paf9), digits = 2, cutoff = .3, sort = TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## Shotput 0.96
## Discus 0.70
## X100.m 0.86
## X400.m 0.71 0.33
## Long.Jump 0.48 0.58
## High.Jump 0.63
## X110.m 0.59
## Pole.vault 0.51
## X1500.m 0.99
## Javelin 0.42
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 1.80 1.61 1.58 1.14
## Proportion Var 0.18 0.16 0.16 0.11
## Cumulative Var 0.18 0.34 0.50 0.61
PC with a random number of components
pc10 <- principal(FullData, nfactors = 5, rotate = "none")
print.psych(pc10, cut = .3, sort = TRUE)
## Principal Components Analysis
## Call: principal(r = FullData, nfactors = 5, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PC1 PC2 PC3 PC4 PC5 h2 u2 com
## Long.Jump 2 0.79 0.70 0.296 1.3
## Shotput 3 0.70 -0.53 0.86 0.141 2.3
## X100.m 1 0.69 -0.52 0.86 0.145 2.4
## X110.m 6 0.69 -0.34 0.62 0.380 1.6
## High.Jump 4 0.67 -0.40 0.70 0.298 2.1
## Discus 7 0.62 -0.52 -0.36 0.85 0.149 3.0
## X400.m 5 0.62 0.55 0.42 0.88 0.118 2.9
## Pole.vault 8 0.54 0.41 -0.44 0.73 0.266 3.5
## X1500.m 10 0.60 0.66 0.95 0.054 2.8
## Javelin 9 0.43 -0.44 0.37 0.57 0.90 0.103 4.0
##
## PC1 PC2 PC3 PC4 PC5
## SS loadings 3.79 1.52 1.11 0.91 0.72
## Proportion Var 0.38 0.15 0.11 0.09 0.07
## Cumulative Var 0.38 0.53 0.64 0.73 0.81
## Proportion Explained 0.47 0.19 0.14 0.11 0.09
## Cumulative Proportion 0.47 0.66 0.80 0.91 1.00
##
## Mean item complexity = 2.6
## Test of the hypothesis that 5 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 98.93 with prob < 8.9e-20
##
## Fit based upon off diagonal values = 0.96
pc11 <- principal(FullData, nfactors = 5, rotate = "varimax")
print.psych(pc11, cut = .3, sort = TRUE)
## Principal Components Analysis
## Call: principal(r = FullData, nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item RC1 RC4 RC2 RC3 RC5 h2 u2 com
## X100.m 1 0.88 0.86 0.145 1.2
## X400.m 5 0.83 0.42 0.88 0.118 1.6
## Long.Jump 2 0.62 0.52 0.70 0.296 2.2
## High.Jump 4 0.75 0.70 0.298 1.5
## Pole.vault 8 0.74 0.40 0.73 0.266 1.6
## X110.m 6 0.36 0.65 0.62 0.380 2.0
## Discus 7 0.90 0.85 0.149 1.1
## Shotput 3 0.85 0.86 0.141 1.4
## X1500.m 10 0.97 0.95 0.054 1.0
## Javelin 9 0.89 0.90 0.103 1.2
##
## RC1 RC4 RC2 RC3 RC5
## SS loadings 2.05 1.93 1.82 1.20 1.05
## Proportion Var 0.21 0.19 0.18 0.12 0.10
## Cumulative Var 0.21 0.40 0.58 0.70 0.81
## Proportion Explained 0.25 0.24 0.23 0.15 0.13
## Cumulative Proportion 0.25 0.49 0.72 0.87 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 5 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 98.93 with prob < 8.9e-20
##
## Fit based upon off diagonal values = 0.96