Week-6-Factor-Analysis.knit

#Running Exploratory Factor Analysis in R Studio (3.5.3)
options(repos = c(CRAN = "https://cloud.r-project.org"))#Packages to install
install.packages("psych")

## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'psych' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mnava\AppData\Local\Temp\RtmpGmiAje\downloaded_packages

install.packages("GPArotation")

## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'GPArotation' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mnava\AppData\Local\Temp\RtmpGmiAje\downloaded_packages

library("psych")

## Warning: package 'psych' was built under R version 4.5.2

library("GPArotation")

## Warning: package 'GPArotation' was built under R version 4.5.2

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

#Getting the data into R
Data <- read.csv(file.choose(),header=T)
attach(Data)
names(Data)

##  [1] "Obs"                           "Form.of.letter.of.application"
##  [3] "Appearance"                    "Academic.ability"             
##  [5] "Likeability"                   "Self.confidence"              
##  [7] "Lucidity"                      "Honesty"                      
##  [9] "Salesmanship"                  "Experience"                   
## [11] "Drive"                         "Ambition"                     
## [13] "Grasp"                         "Potential"                    
## [15] "Keeness.to.join"               "Suitability"

options(scipen = 999)
#Overall Raw Score – determine top 3
Data$OverallRaw <- Form.of.letter.of.application + Appearance + Academic.ability +
  Likeability + Self.confidence + Lucidity + Honesty + Salesmanship + Experience + Drive +
  Ambition + Grasp + Potential + Keeness.to.join + Suitability
#Create data frame with  variables
DataFrame <- Data[, c("Form.of.letter.of.application", "Appearance", "Academic.ability", "Likeability",
                      "Self.confidence", "Lucidity", "Honesty", "Salesmanship", "Experience", "Drive", "Ambition", "Grasp", "Potential",
                      "Keeness.to.join", "Suitability")]
#Scree Plot
ScreePlot <- scree(DataFrame, factors = TRUE, main = "Scree plot", hline = NULL, add = FALSE)

#Eigen Values Total Variance Explained
ev <- eigen(cor(DataFrame))
summary(ev)

##         Length Class  Mode   
## values   15    -none- numeric
## vectors 225    -none- numeric

ev$values

##  [1] 7.51379418 2.05630117 1.45581948 1.19789771 0.73915262 0.49457907
##  [7] 0.35126183 0.30990202 0.25696154 0.18491037 0.15268036 0.09756308
## [13] 0.08881880 0.06463323 0.03572455

cumsum(ev$values)/15

##  [1] 0.5009196 0.6380064 0.7350610 0.8149208 0.8641977 0.8971696 0.9205871
##  [8] 0.9412472 0.9583780 0.9707053 0.9808840 0.9873882 0.9933095 0.9976184
## [15] 1.0000000

#Running the Orthogonal EFA
#Running Ortho FA
ORTHOFA <- factanal(DataFrame, factors = 4, rotation = "varimax", fm="pa")
print(ORTHOFA, digits=3, cutoff=.1, sort=TRUE)

## 
## Call:
## factanal(x = DataFrame, factors = 4, rotation = "varimax", fm = "pa")
## 
## Uniquenesses:
## Form.of.letter.of.application                    Appearance 
##                         0.443                         0.685 
##              Academic.ability                   Likeability 
##                         0.521                         0.185 
##               Self.confidence                      Lucidity 
##                         0.119                         0.198 
##                       Honesty                  Salesmanship 
##                         0.339                         0.138 
##                    Experience                         Drive 
##                         0.357                         0.226 
##                      Ambition                         Grasp 
##                         0.137                         0.153 
##                     Potential               Keeness.to.join 
##                         0.090                         0.005 
##                   Suitability 
##                         0.252 
## 
## Loadings:
##                               Factor1 Factor2 Factor3 Factor4
## Self.confidence                0.918           0.142         
## Lucidity                       0.838   0.111   0.291         
## Salesmanship                   0.885   0.258                 
## Drive                          0.767   0.389   0.172         
## Ambition                       0.904   0.181                 
## Grasp                          0.792   0.275   0.351   0.148 
## Potential                      0.735   0.349   0.432   0.247 
## Form.of.letter.of.application  0.129   0.717   0.113  -0.117 
## Experience                             0.778           0.165 
## Suitability                    0.364   0.770           0.142 
## Likeability                    0.231   0.239   0.838         
## Honesty                        0.252  -0.216   0.742         
## Academic.ability                       0.126           0.677 
## Keeness.to.join                0.424   0.389   0.554  -0.598 
## Appearance                     0.458   0.142   0.243   0.164 
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      5.570   2.473   2.099   1.013
## Proportion Var   0.371   0.165   0.140   0.068
## Cumulative Var   0.371   0.536   0.676   0.744
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 84 on 51 degrees of freedom.
## The p-value is 0.00247

#Create Ortho Factor Average Variables
Data$OrthoFA1 <- (Self.confidence + Appearance + Lucidity + Salesmanship + Drive + Ambition + Grasp + Potential )/8
Data$OrthoFA2 <- (Form.of.letter.of.application + Experience + Suitability)/3
Data$OrthoFA3 <- (Likeability + Honesty )/2
Data$OrthoFA4 <- (Academic.ability+ Keeness.to.join)/2
#Create Ortho Factor Data Frames (to be used for reliability analysis)
Ortho1_DataFrame <- Data[, c("Self.confidence", "Appearance", "Lucidity","Salesmanship","Drive","Ambition","Grasp","Potential")]
Ortho2_DataFrame <- Data[, c("Form.of.letter.of.application", "Experience", "Suitability")]
Ortho3_DataFrame <- Data[, c("Likeability", "Honesty")]
Ortho4_DataFrame <- Data[, c("Academic.ability","Keeness.to.join")]
#Factor Reliability Analysis – only need to interpret the “Raw Alpha” value
alpha(Ortho1_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Ortho1_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.95      0.95    0.96       0.7  19 0.0095  6.1 2.5     0.76
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.93  0.95  0.97
## Duhachek  0.93  0.95  0.97
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## Self.confidence      0.94      0.94    0.96      0.70  16   0.0109 0.0270  0.77
## Appearance           0.96      0.96    0.97      0.78  25   0.0086 0.0034  0.78
## Lucidity             0.94      0.94    0.95      0.69  16   0.0113 0.0239  0.75
## Salesmanship         0.94      0.94    0.95      0.68  15   0.0120 0.0266  0.72
## Drive                0.94      0.94    0.96      0.70  17   0.0107 0.0250  0.77
## Ambition             0.94      0.94    0.95      0.68  15   0.0117 0.0279  0.72
## Grasp                0.94      0.94    0.95      0.68  15   0.0116 0.0267  0.76
## Potential            0.94      0.94    0.95      0.69  15   0.0113 0.0282  0.76
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean  sd
## Self.confidence 48  0.87  0.87  0.86   0.83  6.9 2.4
## Appearance      48  0.58  0.61  0.54   0.51  7.1 2.0
## Lucidity        48  0.90  0.89  0.89   0.86  6.3 3.2
## Salesmanship    48  0.92  0.92  0.91   0.89  4.9 3.4
## Drive           48  0.86  0.85  0.83   0.81  5.3 2.9
## Ambition        48  0.92  0.92  0.92   0.89  6.0 2.9
## Grasp           48  0.92  0.91  0.91   0.89  6.2 3.0
## Potential       48  0.90  0.89  0.89   0.86  5.7 3.2

alpha(Ortho2_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Ortho2_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.82      0.82    0.76      0.61 4.7 0.043  5.4 2.7     0.59
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.71  0.82  0.89
## Duhachek  0.74  0.82  0.91
## 
##  Reliability if an item is dropped:
##                               raw_alpha std.alpha G6(smc) average_r S/N
## Form.of.letter.of.application      0.82      0.82    0.69      0.69 4.5
## Experience                         0.73      0.74    0.59      0.59 2.8
## Suitability                        0.70      0.71    0.55      0.55 2.4
##                               alpha se var.r med.r
## Form.of.letter.of.application    0.052    NA  0.69
## Experience                       0.076    NA  0.59
## Suitability                      0.084    NA  0.55
## 
##  Item statistics 
##                                n raw.r std.r r.cor r.drop mean  sd
## Form.of.letter.of.application 48  0.80  0.83  0.67   0.62  6.0 2.7
## Experience                    48  0.88  0.87  0.78   0.70  4.2 3.3
## Suitability                   48  0.89  0.88  0.81   0.73  6.0 3.3

alpha(Ortho3_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Ortho3_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.78      0.78    0.65      0.65 3.6 0.062  7.1 2.4     0.65
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.61  0.78  0.88
## Duhachek  0.66  0.78  0.90
## 
##  Reliability if an item is dropped:
##             raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Likeability      0.71      0.65    0.42      0.65 1.8       NA     0  0.65
## Honesty          0.58      0.65    0.42      0.65 1.8       NA     0  0.65
## 
##  Item statistics 
##              n raw.r std.r r.cor r.drop mean  sd
## Likeability 48  0.92  0.91  0.73   0.65  6.1 2.8
## Honesty     48  0.90  0.91  0.73   0.65  8.0 2.5

alpha(Ortho4_DataFrame)

## Warning in alpha(Ortho4_DataFrame): Some items were negatively correlated with the first principal component and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## Some items ( Academic.ability ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## Warning in sqrt(Vtc): NaNs produced

## 
## Reliability analysis   
## Call: alpha(x = Ortho4_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r   S/N  ase mean  sd median_r
##       -0.9     -0.96   -0.32     -0.32 -0.49 0.52  6.3 1.4    -0.32
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt    -2.39  -0.9 -0.06
## Duhachek -1.93  -0.9  0.13
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r   S/N alpha se var.r
## Academic.ability     -0.24     -0.32     0.1     -0.32 -0.24       NA     0
## Keeness.to.join      -0.43     -0.32     0.1     -0.32 -0.24       NA     0
##                  med.r
## Academic.ability -0.32
## Keeness.to.join  -0.32
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## Academic.ability 48  0.41  0.58   NaN  -0.32  7.1 2.0
## Keeness.to.join  48  0.73  0.58   NaN  -0.32  5.6 2.7
## 
## Non missing response frequency for each item
##                     0    2    3    4    5    6    7    8    9   10 miss
## Academic.ability 0.00 0.02 0.02 0.06 0.12 0.10 0.23 0.21 0.08 0.15    0
## Keeness.to.join  0.08 0.02 0.08 0.08 0.25 0.15 0.08 0.12 0.02 0.10    0

#Ortho Overall Score
Data$Ortho_Overall <- Data$OrthoFA1 + Data$OrthoFA2 + Data$OrthoFA3 + Data$OrthoFA4
OBLIQFA <- factanal(DataFrame, factors = 4, rotation = "oblimin", fm="pa")
print(OBLIQFA, digits=3, cutoff=.1, sort=TRUE)

## 
## Call:
## factanal(x = DataFrame, factors = 4, rotation = "oblimin", fm = "pa")
## 
## Uniquenesses:
## Form.of.letter.of.application                    Appearance 
##                         0.443                         0.685 
##              Academic.ability                   Likeability 
##                         0.521                         0.185 
##               Self.confidence                      Lucidity 
##                         0.119                         0.198 
##                       Honesty                  Salesmanship 
##                         0.339                         0.138 
##                    Experience                         Drive 
##                         0.357                         0.226 
##                      Ambition                         Grasp 
##                         0.137                         0.153 
##                     Potential               Keeness.to.join 
##                         0.090                         0.005 
##                   Suitability 
##                         0.252 
## 
## Loadings:
##                               Factor1 Factor2 Factor3 Factor4
## Self.confidence                1.004  -0.260                 
## Lucidity                       0.824           0.161         
## Salesmanship                   0.907   0.127                 
## Drive                          0.725   0.272                 
## Ambition                       0.944                         
## Grasp                          0.718   0.158   0.250   0.129 
## Potential                      0.614   0.248   0.369   0.215 
## Form.of.letter.of.application          0.704          -0.210 
## Experience                             0.824                 
## Suitability                    0.231   0.764                 
## Likeability                            0.129   0.870         
## Honesty                               -0.336   0.774         
## Academic.ability                       0.203   0.104   0.668 
## Keeness.to.join                0.226   0.218   0.412  -0.654 
## Appearance                     0.406           0.204   0.155 
## 
##             Factor1 Factor2 Factor3 Factor4
## SS loadings   5.107    2.23    1.83   1.046
## 
## Factor Correlations:
##         Factor1 Factor2 Factor3 Factor4
## Factor1   1.000  0.3385   0.469 -0.1241
## Factor2   0.338  1.0000   0.213 -0.0384
## Factor3   0.469  0.2135   1.000 -0.1710
## Factor4  -0.124 -0.0384  -0.171  1.0000
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 84 on 51 degrees of freedom.
## The p-value is 0.00247

#Create Oblique Factor Average Variables –
Data$ObliqFA1 <- (Self.confidence + Lucidity + Salesmanship + Drive + Ambition + Grasp + Potential + Appearance)/8
Data$ObliqFA2 <- (Form.of.letter.of.application + Experience + Suitability)/3
Data$ObliqFA3 <- (Likeability + Honesty)/2
Data$ObliqFA4 <- (Academic.ability + Keeness.to.join)/2
#Create Ortho Factor Data Frames (to be used for reliability analysis)
Obliq1_DataFrame <- Data[, c("Self.confidence", "Lucidity", "Salesmanship" ,"Drive" ,"Ambition", "Grasp","Potential","Appearance")]
Obliq2_DataFrame <- Data[, c("Form.of.letter.of.application", "Experience", "Suitability")]
Obliq3_DataFrame <- Data[, c("Likeability", "Honesty")]
Obliq4_DataFrame <- Data[, c("Academic.ability", "Keeness.to.join")]
#Factor Reliability Analysis – only need to interpret the “Raw Alpha” value
alpha(Obliq1_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Obliq1_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.95      0.95    0.96       0.7  19 0.0095  6.1 2.5     0.76
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.93  0.95  0.97
## Duhachek  0.93  0.95  0.97
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## Self.confidence      0.94      0.94    0.96      0.70  16   0.0109 0.0270  0.77
## Lucidity             0.94      0.94    0.95      0.69  16   0.0113 0.0239  0.75
## Salesmanship         0.94      0.94    0.95      0.68  15   0.0120 0.0266  0.72
## Drive                0.94      0.94    0.96      0.70  17   0.0107 0.0250  0.77
## Ambition             0.94      0.94    0.95      0.68  15   0.0117 0.0279  0.72
## Grasp                0.94      0.94    0.95      0.68  15   0.0116 0.0267  0.76
## Potential            0.94      0.94    0.95      0.69  15   0.0113 0.0282  0.76
## Appearance           0.96      0.96    0.97      0.78  25   0.0086 0.0034  0.78
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean  sd
## Self.confidence 48  0.87  0.87  0.86   0.83  6.9 2.4
## Lucidity        48  0.90  0.89  0.89   0.86  6.3 3.2
## Salesmanship    48  0.92  0.92  0.91   0.89  4.9 3.4
## Drive           48  0.86  0.85  0.83   0.81  5.3 2.9
## Ambition        48  0.92  0.92  0.92   0.89  6.0 2.9
## Grasp           48  0.92  0.91  0.91   0.89  6.2 3.0
## Potential       48  0.90  0.89  0.89   0.86  5.7 3.2
## Appearance      48  0.58  0.61  0.54   0.51  7.1 2.0

alpha(Obliq2_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Obliq2_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.82      0.82    0.76      0.61 4.7 0.043  5.4 2.7     0.59
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.71  0.82  0.89
## Duhachek  0.74  0.82  0.91
## 
##  Reliability if an item is dropped:
##                               raw_alpha std.alpha G6(smc) average_r S/N
## Form.of.letter.of.application      0.82      0.82    0.69      0.69 4.5
## Experience                         0.73      0.74    0.59      0.59 2.8
## Suitability                        0.70      0.71    0.55      0.55 2.4
##                               alpha se var.r med.r
## Form.of.letter.of.application    0.052    NA  0.69
## Experience                       0.076    NA  0.59
## Suitability                      0.084    NA  0.55
## 
##  Item statistics 
##                                n raw.r std.r r.cor r.drop mean  sd
## Form.of.letter.of.application 48  0.80  0.83  0.67   0.62  6.0 2.7
## Experience                    48  0.88  0.87  0.78   0.70  4.2 3.3
## Suitability                   48  0.89  0.88  0.81   0.73  6.0 3.3

alpha(Obliq3_DataFrame)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = Obliq3_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.78      0.78    0.65      0.65 3.6 0.062  7.1 2.4     0.65
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.61  0.78  0.88
## Duhachek  0.66  0.78  0.90
## 
##  Reliability if an item is dropped:
##             raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Likeability      0.71      0.65    0.42      0.65 1.8       NA     0  0.65
## Honesty          0.58      0.65    0.42      0.65 1.8       NA     0  0.65
## 
##  Item statistics 
##              n raw.r std.r r.cor r.drop mean  sd
## Likeability 48  0.92  0.91  0.73   0.65  6.1 2.8
## Honesty     48  0.90  0.91  0.73   0.65  8.0 2.5

alpha(Obliq4_DataFrame)

## Warning in alpha(Obliq4_DataFrame): Some items were negatively correlated with the first principal component and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## Some items ( Academic.ability ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## Warning in sqrt(Vtc): NaNs produced

## 
## Reliability analysis   
## Call: alpha(x = Obliq4_DataFrame)
## 
##   raw_alpha std.alpha G6(smc) average_r   S/N  ase mean  sd median_r
##       -0.9     -0.96   -0.32     -0.32 -0.49 0.52  6.3 1.4    -0.32
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt    -2.39  -0.9 -0.06
## Duhachek -1.93  -0.9  0.13
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r   S/N alpha se var.r
## Academic.ability     -0.24     -0.32     0.1     -0.32 -0.24       NA     0
## Keeness.to.join      -0.43     -0.32     0.1     -0.32 -0.24       NA     0
##                  med.r
## Academic.ability -0.32
## Keeness.to.join  -0.32
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## Academic.ability 48  0.41  0.58   NaN  -0.32  7.1 2.0
## Keeness.to.join  48  0.73  0.58   NaN  -0.32  5.6 2.7
## 
## Non missing response frequency for each item
##                     0    2    3    4    5    6    7    8    9   10 miss
## Academic.ability 0.00 0.02 0.02 0.06 0.12 0.10 0.23 0.21 0.08 0.15    0
## Keeness.to.join  0.08 0.02 0.08 0.08 0.25 0.15 0.08 0.12 0.02 0.10    0

#Oblique Overall Score
Data$Obliq_Overall_Score <- Data$ObliqFA1 + Data$ObliqFA2 + Data$ObliqFA3 + Data$ObliqFA4