요인 분석


참고 논문양식 : 이주성(2016). 경영학전공 대학생의 전공만족도와 진로결정 자기효능감이 진로준비행동에 미치는 영향


패키지 불러오기
# echo=TRUE : 코드를 평가하고 실행결과를 포함한다
# eval=TRUE : 실행결과와 함게 코드를 출력한다
# message=FALSE : 메시지를 출력한다
# warning=TRUE : 경고메시지를 출력한다 
# error=FALSE : 오류메시지를 출력한다 
# tidy=FALSE : 깔끔한 방식으로 코드 형태를 변형한다 
# 그림 삽입
# {r echo=FALSE, out.width='50%'}
# knitr::include_graphics('./pic.png')

library(readxl)
library(psych)
library(jmv)

데이터 불러오기
# data loading 
setwd("C:/Users/user/Desktop/krivet")
edu <- read_excel("data_merge_3_na.xlsx")
str(edu)
## Classes 'tbl_df', 'tbl' and 'data.frame':    312 obs. of  22 variables:
##  $ satisfaction55: num  4 4 4 5 4 4 4 4 2 3 ...
##  $ facilities79_1: num  4 4 4 4 3 1 4 3 1 3 ...
##  $ facilities80_2: num  4 4 4 2 3 2 3 4 1 3 ...
##  $ facilities81_3: num  4 4 5 3 3 3 3 4 1 3 ...
##  $ facilities82_4: num  4 4 5 3 3 2 3 4 1 3 ...
##  $ ralation84_1  : num  4 4 4 3 3 1 3 4 1 3 ...
##  $ pride85_1     : num  4 4 4 3 4 3 4 4 1 3 ...
##  $ pride86_2     : num  4 4 5 3 4 3 4 4 1 3 ...
##  $ schoollife87_1: num  4 4 4 3 4 4 2 4 1 3 ...
##  $ schoollife88_2: num  4 4 4 3 3 1 4 4 1 3 ...
##  $ schoollife89_3: num  3 4 4 3 3 2 4 4 1 3 ...
##  $ schoollife90_4: num  4 4 3 3 4 3 4 4 1 3 ...
##  $ schoollife91_5: num  4 4 4 3 4 4 4 4 1 3 ...
##  $ schoollife92_6: num  4 4 4 3 4 4 4 4 3 4 ...
##  $ schoollife93_7: num  4 4 4 1 3 3 3 4 1 3 ...
##  $ class94_1     : num  4 4 4 3 4 4 4 4 3 3 ...
##  $ class95_2     : num  4 4 4 3 4 4 4 4 2 3 ...
##  $ class96_3     : num  4 4 4 3 4 3 4 4 2 3 ...
##  $ class97_4     : num  4 4 4 3 4 3 4 4 2 4 ...
##  $ class98_5     : num  4 4 4 3 4 5 4 4 3 4 ...
##  $ class99_6     : num  4 4 4 3 4 5 3 4 3 4 ...
##  $ prof102_1     : num  1 1 2 2 1 1 1 1 1 1 ...
summary(edu)
##  satisfaction55  facilities79_1  facilities80_2  facilities81_3 
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :3.872   Mean   :3.583   Mean   :3.506   Mean   :3.609  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  facilities82_4   ralation84_1     pride85_1       pride86_2    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :3.000   Median :4.000  
##  Mean   :3.622   Mean   :3.712   Mean   :3.465   Mean   :3.679  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  schoollife87_1  schoollife88_2  schoollife89_3  schoollife90_4 
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :4.000  
##  Mean   :3.359   Mean   :3.442   Mean   :3.279   Mean   :3.583  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  schoollife91_5  schoollife92_6  schoollife93_7    class94_1    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000  
##  Median :3.000   Median :4.000   Median :3.000   Median :4.000  
##  Mean   :3.388   Mean   :3.619   Mean   :2.955   Mean   :3.766  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##    class95_2       class96_3       class97_4       class98_5    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :3.756   Mean   :3.846   Mean   :3.798   Mean   :3.715  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##    class99_6       prof102_1    
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:1.000  
##  Median :4.000   Median :2.000  
##  Mean   :3.827   Mean   :1.721  
##  3rd Qu.:4.000   3rd Qu.:2.000  
##  Max.   :5.000   Max.   :5.000
# 종속변수 factor 변환
edu$satisfaction55 <- as.factor(edu$satisfaction55)

설문문항의 타당도와 신뢰도 검증

탐색적 요인분석 1
# KMO & Bartlett 검정
# 이를 통해 알 수 있는 것은, 우리가 분석하고 하는 요인분석이 타당하다는 것을 말해준다. 
# 즉, KMO는 0~1사이의 값으로 추출되는데, 전체의 상관성과 부분의 상관성을 비교하여 나타낸 지표이다. 
# 이 값은 0.6이상이 되어야 한다. 

edu_kmo <- edu[,-1] # 종속변수 제거
str(edu_kmo)   # edu_kmo_cor <- cor(edu_kmo)
## Classes 'tbl_df', 'tbl' and 'data.frame':    312 obs. of  21 variables:
##  $ facilities79_1: num  4 4 4 4 3 1 4 3 1 3 ...
##  $ facilities80_2: num  4 4 4 2 3 2 3 4 1 3 ...
##  $ facilities81_3: num  4 4 5 3 3 3 3 4 1 3 ...
##  $ facilities82_4: num  4 4 5 3 3 2 3 4 1 3 ...
##  $ ralation84_1  : num  4 4 4 3 3 1 3 4 1 3 ...
##  $ pride85_1     : num  4 4 4 3 4 3 4 4 1 3 ...
##  $ pride86_2     : num  4 4 5 3 4 3 4 4 1 3 ...
##  $ schoollife87_1: num  4 4 4 3 4 4 2 4 1 3 ...
##  $ schoollife88_2: num  4 4 4 3 3 1 4 4 1 3 ...
##  $ schoollife89_3: num  3 4 4 3 3 2 4 4 1 3 ...
##  $ schoollife90_4: num  4 4 3 3 4 3 4 4 1 3 ...
##  $ schoollife91_5: num  4 4 4 3 4 4 4 4 1 3 ...
##  $ schoollife92_6: num  4 4 4 3 4 4 4 4 3 4 ...
##  $ schoollife93_7: num  4 4 4 1 3 3 3 4 1 3 ...
##  $ class94_1     : num  4 4 4 3 4 4 4 4 3 3 ...
##  $ class95_2     : num  4 4 4 3 4 4 4 4 2 3 ...
##  $ class96_3     : num  4 4 4 3 4 3 4 4 2 3 ...
##  $ class97_4     : num  4 4 4 3 4 3 4 4 2 4 ...
##  $ class98_5     : num  4 4 4 3 4 5 4 4 3 4 ...
##  $ class99_6     : num  4 4 4 3 4 5 3 4 3 4 ...
##  $ prof102_1     : num  1 1 2 2 1 1 1 1 1 1 ...
KMO(edu_kmo)  
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = edu_kmo)
## Overall MSA =  0.92
## MSA for each item = 
## facilities79_1 facilities80_2 facilities81_3 facilities82_4   ralation84_1 
##           0.92           0.87           0.88           0.89           0.93 
##      pride85_1      pride86_2 schoollife87_1 schoollife88_2 schoollife89_3 
##           0.90           0.90           0.92           0.87           0.94 
## schoollife90_4 schoollife91_5 schoollife92_6 schoollife93_7      class94_1 
##           0.96           0.95           0.93           0.93           0.92 
##      class95_2      class96_3      class97_4      class98_5      class99_6 
##           0.94           0.94           0.94           0.92           0.93 
##      prof102_1 
##           0.85
bartlett.test(edu_kmo)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  edu_kmo
## Bartlett's K-squared = 125.2, df = 20, p-value < 2.2e-16
# fa <- fa(edu_kmo,4,fm="pa")   # pa 주성분분석 방법
fa <- fa(edu_kmo,5,fm="wls")  # wls 잔차제곱합 가중치 방법 , 5개 요인
                              # 요인분석 결과 요인별 최소 0.6이상끼리 변수 묶어서 하나 요인으로 결ㅈ
fa <- fa(edu_kmo,4,fm="wls")   # 본 연구에는 요인 4개로 결정 아래 efa 함수 등 비교해서 결정

str(edu_kmo)
## Classes 'tbl_df', 'tbl' and 'data.frame':    312 obs. of  21 variables:
##  $ facilities79_1: num  4 4 4 4 3 1 4 3 1 3 ...
##  $ facilities80_2: num  4 4 4 2 3 2 3 4 1 3 ...
##  $ facilities81_3: num  4 4 5 3 3 3 3 4 1 3 ...
##  $ facilities82_4: num  4 4 5 3 3 2 3 4 1 3 ...
##  $ ralation84_1  : num  4 4 4 3 3 1 3 4 1 3 ...
##  $ pride85_1     : num  4 4 4 3 4 3 4 4 1 3 ...
##  $ pride86_2     : num  4 4 5 3 4 3 4 4 1 3 ...
##  $ schoollife87_1: num  4 4 4 3 4 4 2 4 1 3 ...
##  $ schoollife88_2: num  4 4 4 3 3 1 4 4 1 3 ...
##  $ schoollife89_3: num  3 4 4 3 3 2 4 4 1 3 ...
##  $ schoollife90_4: num  4 4 3 3 4 3 4 4 1 3 ...
##  $ schoollife91_5: num  4 4 4 3 4 4 4 4 1 3 ...
##  $ schoollife92_6: num  4 4 4 3 4 4 4 4 3 4 ...
##  $ schoollife93_7: num  4 4 4 1 3 3 3 4 1 3 ...
##  $ class94_1     : num  4 4 4 3 4 4 4 4 3 3 ...
##  $ class95_2     : num  4 4 4 3 4 4 4 4 2 3 ...
##  $ class96_3     : num  4 4 4 3 4 3 4 4 2 3 ...
##  $ class97_4     : num  4 4 4 3 4 3 4 4 2 4 ...
##  $ class98_5     : num  4 4 4 3 4 5 4 4 3 4 ...
##  $ class99_6     : num  4 4 4 3 4 5 3 4 3 4 ...
##  $ prof102_1     : num  1 1 2 2 1 1 1 1 1 1 ...
edu_kmo_1 <- edu_kmo[, c("facilities80_2","facilities81_3","facilities82_4",
                         "pride85_1","pride86_2","schoollife87_1","schoollife88_2","schoollife89_3",
                         "class94_1","class95_2","class96_3","class97_4","class98_5","class99_6")]

KMO(edu_kmo_1)                   # 일반적 0.6 이상
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = edu_kmo_1)
## Overall MSA =  0.9
## MSA for each item = 
## facilities80_2 facilities81_3 facilities82_4      pride85_1      pride86_2 
##           0.87           0.83           0.88           0.88           0.89 
## schoollife87_1 schoollife88_2 schoollife89_3      class94_1      class95_2 
##           0.90           0.85           0.92           0.91           0.93 
##      class96_3      class97_4      class98_5      class99_6 
##           0.92           0.92           0.91           0.90
bartlett.test(edu_kmo_1)         # baartlett 검정 (값+유의확률)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  edu_kmo_1
## Bartlett's K-squared = 81.441, df = 13, p-value = 5.906e-12
#fa <- fa(edu_kmo_1,4,fm="wls")   # 본 연구에는 요인 4개로 결정 아래 efa 함수 등 비교해서 결정
fa <- fa(edu_kmo_1,4,fm="pa") 
fa                               # 요인별 고유값, 누적설명비율을 볼수 있다. 
## Factor Analysis using method =  pa
## Call: fa(r = edu_kmo_1, nfactors = 4, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  PA1   PA2   PA3   PA4   h2   u2 com
## facilities80_2  0.01  0.78  0.03 -0.05 0.61 0.39 1.0
## facilities81_3 -0.01  0.84 -0.08  0.09 0.71 0.29 1.0
## facilities82_4  0.04  0.70  0.08 -0.03 0.55 0.45 1.0
## pride85_1      -0.03  0.04  0.06  0.87 0.82 0.18 1.0
## pride86_2       0.22  0.00  0.07  0.61 0.62 0.38 1.3
## schoollife87_1  0.03  0.05  0.67  0.13 0.61 0.39 1.1
## schoollife88_2  0.01 -0.03  0.93  0.01 0.87 0.13 1.0
## schoollife89_3  0.05  0.12  0.61  0.08 0.54 0.46 1.1
## class94_1       0.92 -0.05 -0.15  0.10 0.78 0.22 1.1
## class95_2       0.77  0.02  0.02  0.04 0.66 0.34 1.0
## class96_3       0.79  0.01  0.11 -0.05 0.69 0.31 1.0
## class97_4       0.74  0.08  0.17 -0.12 0.66 0.34 1.2
## class98_5       0.72  0.11 -0.01  0.01 0.59 0.41 1.0
## class99_6       0.73  0.00  0.09  0.04 0.64 0.36 1.0
## 
##                        PA1  PA2  PA3  PA4
## SS loadings           3.96 1.98 2.03 1.40
## Proportion Var        0.28 0.14 0.15 0.10
## Cumulative Var        0.28 0.42 0.57 0.67
## Proportion Explained  0.42 0.21 0.22 0.15
## Cumulative Proportion 0.42 0.63 0.85 1.00
## 
##  With factor correlations of 
##      PA1  PA2  PA3  PA4
## PA1 1.00 0.44 0.53 0.55
## PA2 0.44 1.00 0.39 0.37
## PA3 0.53 0.39 1.00 0.49
## PA4 0.55 0.37 0.49 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  91  and the objective function was  8.93 with Chi Square of  2729.58
## The degrees of freedom for the model are 41  and the objective function was  0.47 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  312 with the empirical chi square  35.35  with prob <  0.72 
## The total number of observations was  312  with Likelihood Chi Square =  142.29  with prob <  4.2e-13 
## 
## Tucker Lewis Index of factoring reliability =  0.914
## RMSEA index =  0.091  and the 90 % confidence intervals are  0.073 0.105
## BIC =  -93.18
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2  PA3  PA4
## Correlation of (regression) scores with factors   0.96 0.92 0.95 0.93
## Multiple R square of scores with factors          0.93 0.85 0.90 0.86
## Minimum correlation of possible factor scores     0.86 0.69 0.81 0.71
plot(fa)

fa.diagram(fa) 

factanal(edu_kmo_1, factors = 4,rotation = "varimax" , scores = "regression")
## 
## Call:
## factanal(x = edu_kmo_1, factors = 4, scores = "regression", rotation = "varimax")
## 
## Uniquenesses:
## facilities80_2 facilities81_3 facilities82_4      pride85_1      pride86_2 
##          0.398          0.289          0.443          0.107          0.407 
## schoollife87_1 schoollife88_2 schoollife89_3      class94_1      class95_2 
##          0.389          0.121          0.473          0.207          0.316 
##      class96_3      class97_4      class98_5      class99_6 
##          0.297          0.349          0.423          0.384 
## 
## Loadings:
##                Factor1 Factor2 Factor3 Factor4
## facilities80_2 0.169   0.144   0.741          
## facilities81_3 0.177           0.803   0.161  
## facilities82_4 0.194   0.208   0.686          
## pride85_1      0.258   0.288   0.180   0.843  
## pride86_2      0.405   0.273   0.155   0.575  
## schoollife87_1 0.263   0.676   0.189   0.222  
## schoollife88_2 0.260   0.879   0.123   0.155  
## schoollife89_3 0.252   0.610   0.236   0.188  
## class94_1      0.846           0.110   0.238  
## class95_2      0.758   0.209   0.171   0.190  
## class96_3      0.764   0.271   0.167   0.132  
## class97_4      0.705   0.314   0.228          
## class98_5      0.682   0.158   0.243   0.169  
## class99_6      0.696   0.274   0.160   0.174  
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      3.849   2.154   2.033   1.360
## Proportion Var   0.275   0.154   0.145   0.097
## Cumulative Var   0.275   0.429   0.574   0.671
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 139.72 on 41 degrees of freedom.
## The p-value is 1.07e-12

탐색적 요인분석 2
# library(jmv)
efa(edu, vars = c("facilities79_1","facilities80_2","facilities81_3","facilities82_4",
                    "ralation84_1","pride85_1","pride86_2","schoollife87_1","schoollife88_2","schoollife89_3",
                    "schoollife90_4","schoollife91_5","schoollife92_6","schoollife93_7","class94_1","class95_2",
                    "class96_3","class97_4","class98_5","class99_6","prof102_1"))
## 
##  EXPLORATORY FACTOR ANALYSIS
## 
##  Factor Loadings                                                               
##  ───────────────────────────────────────────────────────────────────────────── 
##                      1        2        3        4        5        Uniqueness   
##  ───────────────────────────────────────────────────────────────────────────── 
##    facilities79_1    0.566                                             0.488   
##    facilities80_2    0.857                                             0.296   
##    facilities81_3    0.804                                             0.352   
##    facilities82_4    0.709                                             0.470   
##    ralation84_1                                                        0.614   
##    pride85_1                                             0.888         0.217   
##    pride86_2                                             0.692         0.338   
##    schoollife87_1             0.669                                    0.385   
##    schoollife88_2             0.806                                    0.249   
##    schoollife89_3             0.693                                    0.378   
##    schoollife90_4             0.464                                    0.444   
##    schoollife91_5                                                      0.518   
##    schoollife92_6                               0.710                  0.377   
##    schoollife93_7             0.361             0.350                  0.744   
##    class94_1                           0.653                           0.235   
##    class95_2                           0.581                           0.340   
##    class96_3                           0.837                           0.212   
##    class97_4                           0.645                           0.329   
##    class98_5                                    0.645                  0.281   
##    class99_6                           0.336    0.452                  0.343   
##    prof102_1                                                           0.909   
##  ───────────────────────────────────────────────────────────────────────────── 
##    Note. 'oblimin' rotation was used
# 제외 "facilities79_1", "schoollife93_7", "schoollife90_4" , "ralation84_1","prof102_1" "schoollife91_5","schoollife92_6",
efa(edu, vars = c("facilities80_2","facilities81_3","facilities82_4",
                "pride85_1","pride86_2","schoollife87_1","schoollife88_2","schoollife89_3",
                   "class94_1","class95_2","class96_3","class97_4","class98_5","class99_6"))
## 
##  EXPLORATORY FACTOR ANALYSIS
## 
##  Factor Loadings                                                      
##  ──────────────────────────────────────────────────────────────────── 
##                      1        2        3        4        Uniqueness   
##  ──────────────────────────────────────────────────────────────────── 
##    facilities80_2             0.780                           0.393   
##    facilities81_3             0.842                           0.289   
##    facilities82_4             0.705                           0.446   
##    pride85_1                                    0.906         0.124   
##    pride86_2                                    0.573         0.399   
##    schoollife87_1                      0.671                  0.386   
##    schoollife88_2                      0.932                  0.132   
##    schoollife89_3                      0.608                  0.461   
##    class94_1         0.926                                    0.216   
##    class95_2         0.767                                    0.340   
##    class96_3         0.788                                    0.311   
##    class97_4         0.731                                    0.344   
##    class98_5         0.717                                    0.409   
##    class99_6         0.729                                    0.360   
##  ──────────────────────────────────────────────────────────────────── 
##    Note. 'oblimin' rotation was used
# 신뢰도분석_알파
# library(jmv)
# vars 에 묶여진 문항의 전체 신뢰도계수 계산
a <- reliability(edu_kmo, vars = c("pride85_1","pride86_2"),omegaScale = TRUE)
a
## 
##  RELIABILITY ANALYSIS
## 
##  Scale Reliability Statistics              
##  ───────────────────────────────────────── 
##             Cronbach's α    McDonald's ω   
##  ───────────────────────────────────────── 
##    scale           0.821           0.822   
##  ─────────────────────────────────────────

결과 도표