I Kết nối dữ liệu

library(foreign)
setwd("c:/vidu")
dulieu <-read.dta("EFA.dta")
head(dulieu)
##   SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1    4    1    4    3    3    3    2    3    2    2    3    2    4    4    5
## 2    5    5    4    5    1    2    3    5    5    5    5    5    2    2    2
## 3    5    5    5    5    4    4    5    4    4    4    3    4    4    4    5
## 4    4    4    5    4    3    4    3    3    3    4    3    4    5    5    5
## 5    3    3    3    4    4    4    3    4    5    5    5    5    3    5    3
## 6    2    2    2    2    5    5    2    5    3    4    3    3    4    4    3
##   PAM4 TAD1 TAD2 TAD3 TAD4
## 1    4    5    5    5    5
## 2    2    4    4    4    4
## 3    4    3    4    3    3
## 4    5    3    4    2    5
## 5    3    2    2    3    2
## 6    4    4    4    3    4
library(tidyverse)
## -- Attaching packages ------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
dulieu2 <-dulieu %>% select(1:20)
head(dulieu2)
##   SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1    4    1    4    3    3    3    2    3    2    2    3    2    4    4    5
## 2    5    5    4    5    1    2    3    5    5    5    5    5    2    2    2
## 3    5    5    5    5    4    4    5    4    4    4    3    4    4    4    5
## 4    4    4    5    4    3    4    3    3    3    4    3    4    5    5    5
## 5    3    3    3    4    4    4    3    4    5    5    5    5    3    5    3
## 6    2    2    2    2    5    5    2    5    3    4    3    3    4    4    3
##   PAM4 TAD1 TAD2 TAD3 TAD4
## 1    4    5    5    5    5
## 2    2    4    4    4    4
## 3    4    3    4    3    3
## 4    5    3    4    2    5
## 5    3    2    2    3    2
## 6    4    4    4    3    4

II Biểu đồ tương quan biến

corrEFA <- cor(dulieu2)
plot(corrEFA)

library(corrplot)
## corrplot 0.84 loaded
corrplot(corrEFA, method="color")

III Phân tích tần suất của biến

library(likert)
## Loading required package: xtable
## 
## Attaching package: 'likert'
## The following object is masked from 'package:dplyr':
## 
##     recode
dulieu[] <-lapply(dulieu[], as.factor)
dulieu <- likert(dulieu)
plot(dulieu, type="heat")

plot(dulieu, centered=FALSE)

IV Kiểm tra Conback Alpha

# Kiểm tra thang đo SAT
psych::alpha(dulieu2[,c(1,2,3,4)])
## 
## Reliability analysis   
## Call: psych::alpha(x = dulieu2[, c(1, 2, 3, 4)])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd median_r
##       0.87      0.87    0.87      0.62 6.6 0.019  3.9  1     0.59
## 
##  lower alpha upper     95% confidence boundaries
## 0.83 0.87 0.9 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r med.r
## SAT1      0.78      0.78    0.71      0.54 3.5    0.031 0.00258  0.55
## SAT2      0.84      0.85    0.84      0.65 5.6    0.024 0.04022  0.59
## SAT3      0.82      0.82    0.76      0.61 4.6    0.025 0.00089  0.59
## SAT4      0.87      0.87    0.86      0.69 6.6    0.020 0.02791  0.64
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## SAT1 150  0.91  0.92  0.93   0.84  3.9 1.1
## SAT2 150  0.82  0.82  0.71   0.68  3.9 1.2
## SAT3 150  0.86  0.86  0.85   0.73  3.8 1.2
## SAT4 150  0.79  0.79  0.66   0.62  4.0 1.2
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## SAT1 0.05 0.05 0.24 0.31 0.35    0
## SAT2 0.08 0.07 0.07 0.39 0.38    0
## SAT3 0.05 0.06 0.37 0.10 0.43    0
## SAT4 0.08 0.07 0.07 0.37 0.41    0
# Kiểm tra thang do RAS
psych::alpha(dulieu2[,5:8])
## 
## Reliability analysis   
## Call: psych::alpha(x = dulieu2[, 5:8])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.83      0.83    0.83      0.55   5 0.022  3.8 0.99     0.57
## 
##  lower alpha upper     95% confidence boundaries
## 0.79 0.83 0.88 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## RAS1      0.70      0.70    0.61      0.44 2.3    0.043 0.0045  0.41
## RAS2      0.81      0.81    0.78      0.59 4.3    0.027 0.0258  0.62
## RAS3      0.82      0.82    0.76      0.60 4.5    0.026 0.0061  0.62
## RAS4      0.81      0.81    0.80      0.59 4.4    0.027 0.0331  0.66
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## RAS1 150  0.93  0.92  0.93   0.85  3.6 1.2
## RAS2 150  0.79  0.79  0.69   0.61  3.9 1.3
## RAS3 150  0.77  0.77  0.70   0.59  3.6 1.2
## RAS4 150  0.77  0.78  0.66   0.60  4.0 1.2
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## RAS1 0.09 0.07 0.29 0.25 0.29    0
## RAS2 0.09 0.07 0.15 0.31 0.39    0
## RAS3 0.04 0.13 0.39 0.07 0.37    0
## RAS4 0.06 0.07 0.11 0.35 0.41    0
# Kiểm tra thang đo COM
dulieu2 %>% select(9:12) %>% psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##        0.9       0.9     0.9       0.7 9.4 0.013  3.8 0.98     0.69
## 
##  lower alpha upper     95% confidence boundaries
## 0.88 0.9 0.93 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se   var.r med.r
## COM1      0.84      0.84    0.78      0.64 5.4    0.022 0.00172  0.63
## COM2      0.89      0.89    0.88      0.73 8.2    0.016 0.01852  0.71
## COM3      0.87      0.87    0.82      0.70 6.8    0.018 0.00011  0.69
## COM4      0.89      0.89    0.87      0.73 8.2    0.016 0.01764  0.69
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## COM1 150  0.93  0.93  0.93   0.87  3.7 1.1
## COM2 150  0.85  0.85  0.77   0.74  3.9 1.1
## COM3 150  0.89  0.89  0.87   0.79  3.7 1.1
## COM4 150  0.85  0.85  0.78   0.74  4.0 1.1
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## COM1 0.03 0.09 0.33 0.25 0.30    0
## COM2 0.03 0.13 0.11 0.35 0.37    0
## COM3 0.03 0.08 0.45 0.07 0.37    0
## COM4 0.03 0.12 0.11 0.34 0.40    0
# Kiểm tra thang đo PAM
dulieu2 %>% select(c(13,14,15,16)) %>% psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.86      0.86    0.84      0.61 6.2 0.019  3.8 0.92     0.62
## 
##  lower alpha upper     95% confidence boundaries
## 0.82 0.86 0.9 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## PAM1      0.77      0.77    0.69      0.52 3.3    0.033 0.0027  0.54
## PAM2      0.83      0.83    0.80      0.62 4.9    0.025 0.0201  0.67
## PAM3      0.84      0.84    0.78      0.64 5.2    0.023 0.0037  0.67
## PAM4      0.85      0.85    0.81      0.65 5.6    0.022 0.0104  0.68
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## PAM1 150  0.91  0.92  0.91   0.84  3.7 1.1
## PAM2 150  0.83  0.83  0.73   0.68  3.9 1.1
## PAM3 150  0.82  0.82  0.74   0.67  3.7 1.2
## PAM4 150  0.80  0.80  0.70   0.64  4.0 1.1
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## PAM1 0.02 0.11 0.31 0.28 0.27    0
## PAM2 0.02 0.15 0.09 0.40 0.34    0
## PAM3 0.01 0.14 0.39 0.09 0.37    0
## PAM4 0.01 0.15 0.09 0.33 0.43    0
# Kiểm tra thang do TAD 
dulieu2 %>% select(17:20) %>% psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.86      0.86    0.86      0.62 6.4 0.018  3.8 0.93      0.6
## 
##  lower alpha upper     95% confidence boundaries
## 0.83 0.86 0.9 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## TAD1      0.76      0.76    0.68      0.52 3.2    0.034 0.0014  0.52
## TAD2      0.85      0.85    0.84      0.65 5.6    0.022 0.0309  0.65
## TAD3      0.83      0.83    0.77      0.62 4.9    0.024 0.0037  0.65
## TAD4      0.86      0.86    0.84      0.67 6.1    0.021 0.0236  0.66
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## TAD1 150  0.93  0.93  0.94   0.87  3.7 1.1
## TAD2 150  0.81  0.81  0.70   0.66  4.0 1.1
## TAD3 150  0.85  0.84  0.81   0.71  3.6 1.2
## TAD4 150  0.79  0.79  0.68   0.63  4.0 1.1
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## TAD1 0.04 0.09 0.30 0.31 0.27    0
## TAD2 0.03 0.12 0.05 0.45 0.35    0
## TAD3 0.03 0.14 0.35 0.15 0.32    0
## TAD4 0.02 0.11 0.12 0.39 0.36    0

V Kiểm định cần thiết

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# Kiểm định KMO
KMO(dulieu2)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = dulieu2)
## Overall MSA =  0.7
## MSA for each item = 
## SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3 PAM4 
## 0.65 0.80 0.67 0.79 0.62 0.66 0.60 0.73 0.66 0.79 0.68 0.81 0.68 0.84 0.68 0.77 
## TAD1 TAD2 TAD3 TAD4 
## 0.63 0.79 0.62 0.73
# Kiểm định Bartlett
cortest.bartlett(dulieu2)
## R was not square, finding R from data
## $chisq
## [1] 1834.681
## 
## $p.value
## [1] 1.237199e-266
## 
## $df
## [1] 190

VI Phân tích nhân tố

library(GPArotation)
pca <- prcomp(dulieu2,scale=TRUE)
pca 
## Standard deviations (1, .., p=20):
##  [1] 1.8924523 1.8001221 1.6842887 1.6140053 1.4964414 0.8763247 0.8388667
##  [8] 0.7794366 0.7053428 0.6836881 0.6654456 0.6491739 0.6078680 0.5660541
## [15] 0.4976879 0.4451576 0.3995541 0.3297970 0.3136155 0.2934569
## 
## Rotation (n x k) = (20 x 20):
##              PC1        PC2         PC3          PC4         PC5         PC6
## SAT1 -0.28710124  0.2220401 -0.21637375  0.024205663 -0.35355132  0.12026415
## SAT2 -0.23155529  0.2649554 -0.10770054 -0.051243731 -0.31962761 -0.11172325
## SAT3 -0.29573856  0.2039781 -0.21361032 -0.003950633 -0.29063727  0.13615981
## SAT4 -0.24750828  0.1858605 -0.17850663 -0.071340662 -0.27302013 -0.25146895
## RAS1  0.12597519 -0.3420626  0.11246452  0.074303983 -0.41350372  0.13446443
## RAS2  0.08939154 -0.3192606  0.10694571  0.030753821 -0.31553673 -0.32414827
## RAS3  0.08574749 -0.2754768  0.06803262  0.029086795 -0.37850473  0.55932439
## RAS4  0.14798485 -0.2889662  0.02119947  0.085582936 -0.33076420 -0.38313422
## COM1  0.32668648  0.2257259 -0.14891414  0.299284931 -0.11827861  0.10857508
## COM2  0.32681502  0.2147279 -0.10268569  0.241981119 -0.06177128 -0.19328309
## COM3  0.33461628  0.2124067 -0.12851155  0.266401120 -0.07990923  0.02266031
## COM4  0.28785031  0.2281613 -0.10283503  0.302923126 -0.01299116  0.01394303
## PAM1 -0.26902725 -0.1497459 -0.02455888  0.433908471  0.09860167  0.03874956
## PAM2 -0.24167616 -0.1155474 -0.01341206  0.389347239  0.13486790 -0.01542083
## PAM3 -0.20694002 -0.1172711  0.01356218  0.420501221  0.08363126  0.27537116
## PAM4 -0.26613558 -0.1283162 -0.01444170  0.351076977  0.06038844 -0.32374818
## TAD1 -0.06600491  0.1947318  0.49813986  0.092564915 -0.08339359 -0.11488358
## TAD2 -0.04366643  0.2481526  0.40538651 -0.011184740 -0.04234324  0.18458173
## TAD3 -0.03273260  0.1739609  0.44532009  0.087851605 -0.11589642 -0.15206789
## TAD4 -0.04463748  0.1898232  0.40391636  0.099156338 -0.07428536  0.07655294
##              PC7         PC8          PC9        PC10         PC11         PC12
## SAT1 -0.13803240  0.13236255 -0.211350316  0.05119322 -0.141304042  0.052366492
## SAT2  0.24242199  0.04676304  0.396913136 -0.08288688  0.004737086 -0.005075803
## SAT3 -0.29644195  0.30060183 -0.295200273 -0.02552462 -0.067764761  0.066857734
## SAT4  0.28233444 -0.50855533  0.144786823  0.10034154  0.106408430 -0.175974557
## RAS1 -0.01244359 -0.04945080  0.032536927 -0.07503037 -0.002051218  0.026458100
## RAS2  0.34853668  0.14506299 -0.170901472 -0.48543268 -0.344351659 -0.026463047
## RAS3 -0.15935276 -0.07071634  0.363716686 -0.04460158  0.215256990 -0.047112476
## RAS4 -0.10749216 -0.02246508 -0.156463901  0.66656640  0.033260066  0.207794844
## COM1  0.03331994 -0.18910677 -0.141605234 -0.20380026  0.086966152  0.024153378
## COM2 -0.05139932  0.31704926  0.281889010  0.07591575  0.036439923 -0.225994918
## COM3 -0.01223960 -0.21807928 -0.414327477 -0.15546413  0.127746346  0.077714327
## COM4  0.04950189  0.07165789  0.355099824  0.14762421 -0.253816790  0.143366470
## PAM1  0.13565006  0.01207459  0.007292349 -0.11322547  0.002141764 -0.115885895
## PAM2 -0.23346440 -0.09822032  0.203987127 -0.01373062 -0.533409410  0.263685994
## PAM3  0.33870420 -0.07390963 -0.222368203  0.32439624  0.007344842 -0.376578449
## PAM4 -0.17257916  0.03804639  0.100867424 -0.22869074  0.609558605  0.302269121
## TAD1 -0.11209634 -0.07994565 -0.028463136  0.05360291 -0.086025364 -0.210982028
## TAD2  0.20646880 -0.28636638 -0.033515554  0.03015811 -0.053838449  0.579195642
## TAD3 -0.45733151 -0.16151326 -0.004670854 -0.12053629 -0.028151665 -0.362227687
## TAD4  0.32847748  0.53117558 -0.046154058  0.10050940  0.202981477  0.097047050
##              PC13        PC14        PC15          PC16         PC17
## SAT1 -0.106937458  0.07298397 -0.05190413 -0.0428869143 -0.116943948
## SAT2  0.651535294  0.08743057 -0.21153503  0.1805961019 -0.023316939
## SAT3 -0.148228430  0.11421886  0.10780359 -0.0102949642  0.061409406
## SAT4 -0.411605172 -0.30038241  0.16894279 -0.0908959152  0.112862310
## RAS1 -0.091565429  0.02007611  0.08395996  0.5601985919  0.442751062
## RAS2 -0.127018359  0.08961031 -0.01081394 -0.1263012604 -0.265495857
## RAS3 -0.010174060 -0.10503915  0.01254775 -0.3522564974 -0.241273076
## RAS4  0.204237084  0.02383063 -0.06008368 -0.1911121335 -0.008069188
## COM1  0.096240679 -0.09844933 -0.05215839 -0.0761941306 -0.177571810
## COM2 -0.005860717  0.13426761  0.67447632  0.0356959797 -0.079468304
## COM3  0.228198968 -0.23042167 -0.05291890  0.0006315171  0.139309181
## COM4 -0.421140834  0.22337988 -0.51072030  0.0117390061  0.104067720
## PAM1  0.139766011  0.24285326  0.06217200 -0.4991915523  0.565010323
## PAM2  0.110254877 -0.48683096  0.16539940  0.0959878798 -0.106095931
## PAM3  0.002406408  0.15632352 -0.01830424  0.3332414202 -0.323712448
## PAM4 -0.164832058  0.09840823 -0.09186917  0.1485629835 -0.225229222
## TAD1 -0.002289379  0.03855802 -0.09378872 -0.2170390291 -0.215068228
## TAD2 -0.011251143  0.36301871  0.33941012  0.0111343426 -0.024186635
## TAD3  0.015777835  0.05439403 -0.11040682  0.1424424975  0.144700065
## TAD4 -0.091452916 -0.51640624 -0.06328452 -0.0230290039  0.150348098
##              PC18          PC19          PC20
## SAT1 -0.054459005  0.7273736810 -0.0884545839
## SAT2  0.011197099 -0.1036900085 -0.0311900481
## SAT3  0.015314739 -0.6255111314  0.0718754991
## SAT4  0.038235267 -0.0845562161 -0.0005973106
## RAS1 -0.347286256  0.0530967885  0.0279437323
## RAS2  0.176844013 -0.0479145565 -0.0460567126
## RAS3  0.155954388 -0.0580163567 -0.1437645205
## RAS4  0.091766817 -0.0327385208  0.1162748537
## COM1 -0.180013452  0.0197525181  0.7116083916
## COM2  0.004692165  0.0702654126 -0.1024362855
## COM3  0.098480481 -0.0636732827 -0.5843789260
## COM4  0.064142611 -0.0871989536 -0.1056113833
## PAM1 -0.035357353  0.0682858872  0.0593579661
## PAM2  0.003570740 -0.0321532082 -0.0009987273
## PAM3  0.133678353 -0.0850444891 -0.0338485161
## PAM4 -0.039694851  0.0247607488 -0.0621995268
## TAD1 -0.671646978 -0.1146167357 -0.1959827122
## TAD2  0.152484456 -0.0004124992  0.0091510350
## TAD3  0.507599275  0.0800078304  0.1536464024
## TAD4  0.107803919  0.0462924856  0.0936756735
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Phương sai trích
u <- get_eig(pca)
u
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  3.58137553       17.9068776                    17.90688
## Dim.2  3.24043969       16.2021985                    34.10908
## Dim.3  2.83682835       14.1841418                    48.29322
## Dim.4  2.60501303       13.0250652                    61.31828
## Dim.5  2.23933677       11.1966839                    72.51497
## Dim.6  0.76794496        3.8397248                    76.35469
## Dim.7  0.70369734        3.5184867                    79.87318
## Dim.8  0.60752145        3.0376072                    82.91079
## Dim.9  0.49750846        2.4875423                    85.39833
## Dim.10 0.46742945        2.3371473                    87.73548
## Dim.11 0.44281782        2.2140891                    89.94956
## Dim.12 0.42142676        2.1071338                    92.05670
## Dim.13 0.36950355        1.8475177                    93.90422
## Dim.14 0.32041723        1.6020861                    95.50630
## Dim.15 0.24769321        1.2384660                    96.74477
## Dim.16 0.19816526        0.9908263                    97.73559
## Dim.17 0.15964347        0.7982173                    98.53381
## Dim.18 0.10876604        0.5438302                    99.07764
## Dim.19 0.09835470        0.4917735                    99.56942
## Dim.20 0.08611694        0.4305847                   100.00000
y <- u%>% mutate(NhanTo = 1:20)
y
##    eigenvalue variance.percent cumulative.variance.percent NhanTo
## 1  3.58137553       17.9068776                    17.90688      1
## 2  3.24043969       16.2021985                    34.10908      2
## 3  2.83682835       14.1841418                    48.29322      3
## 4  2.60501303       13.0250652                    61.31828      4
## 5  2.23933677       11.1966839                    72.51497      5
## 6  0.76794496        3.8397248                    76.35469      6
## 7  0.70369734        3.5184867                    79.87318      7
## 8  0.60752145        3.0376072                    82.91079      8
## 9  0.49750846        2.4875423                    85.39833      9
## 10 0.46742945        2.3371473                    87.73548     10
## 11 0.44281782        2.2140891                    89.94956     11
## 12 0.42142676        2.1071338                    92.05670     12
## 13 0.36950355        1.8475177                    93.90422     13
## 14 0.32041723        1.6020861                    95.50630     14
## 15 0.24769321        1.2384660                    96.74477     15
## 16 0.19816526        0.9908263                    97.73559     16
## 17 0.15964347        0.7982173                    98.53381     17
## 18 0.10876604        0.5438302                    99.07764     18
## 19 0.09835470        0.4917735                    99.56942     19
## 20 0.08611694        0.4305847                   100.00000     20
y %>% ggplot(aes(NhanTo, eigenvalue)) + geom_line() + geom_point(color="red", size=3)

# Lựa chọn nhân tố bằng %
fviz_screeplot(pca, addlabels=TRUE, n=20)

# Lựa chọn bằng giá trị eigenvalue
fviz_screeplot(pca, addlabels=TRUE, n=20, choice="eigenvalue")

VII Ma trận xoay

#Như ở trên chúng ta có 5 nhân tố
xoay <- principal(dulieu2,nfactors=5, rotate="varimax")
xoay
## Principal Components Analysis
## Call: principal(r = dulieu2, nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##        RC1   RC5   RC3   RC4   RC2   h2   u2 com
## SAT1  0.01  0.93 -0.02  0.11 -0.04 0.87 0.13 1.0
## SAT2  0.00  0.81  0.14 -0.06 -0.08 0.69 0.31 1.1
## SAT3 -0.05  0.86 -0.05  0.10 -0.09 0.77 0.23 1.1
## SAT4 -0.09  0.77 -0.04 -0.02 -0.07 0.60 0.40 1.1
## RAS1 -0.01 -0.09  0.01  0.02  0.93 0.87 0.13 1.0
## RAS2 -0.10 -0.12 -0.01  0.01  0.77 0.62 0.38 1.1
## RAS3 -0.04  0.01 -0.02 -0.02  0.78 0.61 0.39 1.0
## RAS4  0.09 -0.09 -0.11  0.01  0.77 0.61 0.39 1.1
## COM1  0.93  0.03 -0.02 -0.05  0.03 0.87 0.13 1.0
## COM2  0.84 -0.06  0.01 -0.11 -0.01 0.72 0.28 1.0
## COM3  0.89 -0.04 -0.02 -0.08  0.01 0.79 0.21 1.0
## COM4  0.85 -0.06  0.03  0.02 -0.09 0.73 0.27 1.0
## PAM1 -0.06  0.04 -0.03  0.92  0.02 0.85 0.15 1.0
## PAM2 -0.05  0.00 -0.01  0.83 -0.05 0.69 0.31 1.0
## PAM3  0.01 -0.01  0.04  0.82  0.03 0.67 0.33 1.0
## PAM4 -0.11  0.09 -0.01  0.78  0.02 0.64 0.36 1.1
## TAD1 -0.03 -0.01  0.94  0.05 -0.01 0.88 0.12 1.0
## TAD2 -0.02  0.03  0.80 -0.12 -0.14 0.68 0.32 1.1
## TAD3  0.02  0.00  0.84  0.02  0.05 0.71 0.29 1.0
## TAD4  0.04  0.01  0.79  0.05 -0.03 0.62 0.38 1.0
## 
##                        RC1  RC5  RC3  RC4  RC2
## SS loadings           3.14 2.89 2.89 2.88 2.71
## Proportion Var        0.16 0.14 0.14 0.14 0.14
## Cumulative Var        0.16 0.30 0.45 0.59 0.73
## Proportion Explained  0.22 0.20 0.20 0.20 0.19
## Cumulative Proportion 0.22 0.42 0.61 0.81 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 5 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.05 
##  with the empirical chi square  148.45  with prob <  0.0012 
## 
## Fit based upon off diagonal values = 0.96
# Chọn Loading Factor là 0.55
print.psych(xoay, cut=0.55, sort=TRUE)
## Principal Components Analysis
## Call: principal(r = dulieu2, nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      item   RC1   RC5   RC3   RC4   RC2   h2   u2 com
## COM1    9  0.93                         0.87 0.13 1.0
## COM3   11  0.89                         0.79 0.21 1.0
## COM4   12  0.85                         0.73 0.27 1.0
## COM2   10  0.84                         0.72 0.28 1.0
## SAT1    1        0.93                   0.87 0.13 1.0
## SAT3    3        0.86                   0.77 0.23 1.1
## SAT2    2        0.81                   0.69 0.31 1.1
## SAT4    4        0.77                   0.60 0.40 1.1
## TAD1   17              0.94             0.88 0.12 1.0
## TAD3   19              0.84             0.71 0.29 1.0
## TAD2   18              0.80             0.68 0.32 1.1
## TAD4   20              0.79             0.62 0.38 1.0
## PAM1   13                    0.92       0.85 0.15 1.0
## PAM2   14                    0.83       0.69 0.31 1.0
## PAM3   15                    0.82       0.67 0.33 1.0
## PAM4   16                    0.78       0.64 0.36 1.1
## RAS1    5                          0.93 0.87 0.13 1.0
## RAS3    7                          0.78 0.61 0.39 1.0
## RAS2    6                          0.77 0.62 0.38 1.1
## RAS4    8                          0.77 0.61 0.39 1.1
## 
##                        RC1  RC5  RC3  RC4  RC2
## SS loadings           3.14 2.89 2.89 2.88 2.71
## Proportion Var        0.16 0.14 0.14 0.14 0.14
## Cumulative Var        0.16 0.30 0.45 0.59 0.73
## Proportion Explained  0.22 0.20 0.20 0.20 0.19
## Cumulative Proportion 0.22 0.42 0.61 0.81 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 5 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.05 
##  with the empirical chi square  148.45  with prob <  0.0012 
## 
## Fit based upon off diagonal values = 0.96

VIII Ước lượng nhân tố

uocluong <- predict(pca, newdata = dulieu2)
head(uocluong)
##         PC1        PC2        PC3        PC4        PC5        PC6        PC7
## 1 -2.027451 -0.3087310  2.7204660 -0.6181991  2.2289661  0.6461760 -0.6027724
## 2  1.890772  3.6476452 -1.0269968 -1.5938388 -0.7063449 -0.4935091 -0.3686621
## 3 -1.184711 -0.2423145 -1.4049121  0.3801400 -1.4650525  1.0210511  0.3076885
## 4 -1.979745 -0.2035598 -0.7188705  1.1974250  0.9253456  0.4146206  1.1024258
## 5  2.544398 -0.6033350 -2.5742686  0.4079847  0.7733712 -0.5022852 -0.6598850
## 6  1.308336 -2.4337218  1.5070800 -0.3854037  1.5846968 -1.2309604  0.2446514
##           PC8         PC9         PC10        PC11       PC12        PC13
## 1  0.02571746 -2.37086833  0.396807339 -0.08423920 -0.3908935 -0.87590789
## 2 -0.29426217 -0.21006244  1.466026170  0.15206048 -0.3240190  0.53364945
## 3 -0.38076150  0.43922187  0.242283258 -0.15303352 -0.3368169 -0.07553634
## 4  1.33069726  0.03238643 -0.108806392 -0.08818959  0.5196258 -0.45926979
## 5 -0.79243004  0.25815926 -0.400087932 -1.19454824 -0.4670095 -0.01799287
## 6  0.54106156 -0.59200419  0.004466051 -0.40110941  0.4921677  0.21579935
##          PC14       PC15         PC16        PC17         PC18       PC19
## 1 -0.07206824  0.2917226  0.243580891  0.24054891 -0.003806319  0.3772994
## 2 -0.17161063 -0.4803949 -1.137895332 -0.32114601  0.338607503  0.4002341
## 3  0.66940820  0.3096231  0.096058982 -0.55779209  0.198253694 -0.1396824
## 4 -0.13890899  0.5031072  0.009476989  0.07560331  0.024599057 -0.5067870
## 5 -0.80351038  0.1435355  0.564288322  0.23315915  0.113136360 -0.0946332
## 6  0.24336639  0.6551518  0.330943041  0.69359086 -0.895532238  0.1777442
##         PC20
## 1 -0.2147253
## 2  0.1286330
## 3  0.2659435
## 4 -0.1363060
## 5  0.1781751
## 6  0.2410815
head(dulieu2)
##   SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1    4    1    4    3    3    3    2    3    2    2    3    2    4    4    5
## 2    5    5    4    5    1    2    3    5    5    5    5    5    2    2    2
## 3    5    5    5    5    4    4    5    4    4    4    3    4    4    4    5
## 4    4    4    5    4    3    4    3    3    3    4    3    4    5    5    5
## 5    3    3    3    4    4    4    3    4    5    5    5    5    3    5    3
## 6    2    2    2    2    5    5    2    5    3    4    3    3    4    4    3
##   PAM4 TAD1 TAD2 TAD3 TAD4
## 1    4    5    5    5    5
## 2    2    4    4    4    4
## 3    4    3    4    3    3
## 4    5    3    4    2    5
## 5    3    2    2    3    2
## 6    4    4    4    3    4
uocluong2 <-data.frame(uocluong)
uocluong3 <- uocluong2 %>% select(1:5)
names(uocluong3) <-c("COM", "SAT", "TAD", "PAM", "RAS")
head(uocluong3)
##         COM        SAT        TAD        PAM        RAS
## 1 -2.027451 -0.3087310  2.7204660 -0.6181991  2.2289661
## 2  1.890772  3.6476452 -1.0269968 -1.5938388 -0.7063449
## 3 -1.184711 -0.2423145 -1.4049121  0.3801400 -1.4650525
## 4 -1.979745 -0.2035598 -0.7188705  1.1974250  0.9253456
## 5  2.544398 -0.6033350 -2.5742686  0.4079847  0.7733712
## 6  1.308336 -2.4337218  1.5070800 -0.3854037  1.5846968

IX Hồi quy OLS

ols <-lm(SAT ~ COM + TAD + PAM + RAS, data=uocluong3)
summary(ols)
## 
## Call:
## lm(formula = SAT ~ COM + TAD + PAM + RAS, data = uocluong3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3386 -1.3264 -0.1159  1.3118  4.7138 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  7.252e-17  1.490e-01       0        1
## COM          4.229e-16  7.899e-02       0        1
## TAD         -2.869e-16  8.876e-02       0        1
## PAM         -1.493e-16  9.262e-02       0        1
## RAS          5.349e-16  9.990e-02       0        1
## 
## Residual standard error: 1.825 on 145 degrees of freedom
## Multiple R-squared:  3.891e-31,  Adjusted R-squared:  -0.02759 
## F-statistic: 1.411e-29 on 4 and 145 DF,  p-value: 1

X, Mở rộng

# Uoc luong bien doc lap
solieu1 <- dulieu2 %>% select(5:20)
head(solieu1)
##   RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3 PAM4 TAD1 TAD2 TAD3
## 1    3    3    2    3    2    2    3    2    4    4    5    4    5    5    5
## 2    1    2    3    5    5    5    5    5    2    2    2    2    4    4    4
## 3    4    4    5    4    4    4    3    4    4    4    5    4    3    4    3
## 4    3    4    3    3    3    4    3    4    5    5    5    5    3    4    2
## 5    4    4    3    4    5    5    5    5    3    5    3    3    2    2    3
## 6    5    5    2    5    3    4    3    3    4    4    3    4    4    4    3
##   TAD4
## 1    5
## 2    4
## 3    3
## 4    5
## 5    2
## 6    4
pca1 <- prcomp(solieu1,scale=TRUE)
luu1 <- predict(pca1, newdata = solieu1)
head(luu1)
##          PC1        PC2        PC3        PC4        PC5        PC6         PC7
## 1  2.3874770  2.9659111  0.6881738  0.7468256  0.1748730  0.3138086 -0.98134786
## 2 -3.9265125  0.7218024  0.8523711  1.4195603 -0.5124710 -0.4013547  0.01655909
## 3  0.6295641 -1.1517060 -0.4303911 -0.3055716  1.1863367 -0.2801499  0.12953984
## 4  1.6223557  0.2643848 -1.8117744  0.4810545  0.5578136  1.5378236  0.68343481
## 5 -1.9466703 -2.6074964 -1.2601078  0.7970574 -0.5618247 -0.8465233 -0.30444719
## 6  0.9221323 -0.4334297  0.8428584 -0.2521046 -1.3132127  0.9796125 -0.43965052
##          PC8        PC9       PC10        PC11        PC12       PC13
## 1 -0.4792780  0.3740181 -0.8693007 -0.26035969  0.16468992  0.5760037
## 2 -1.3793282  0.6119236 -0.3493153 -0.17164864  0.15977475 -1.2328603
## 3 -0.2075910 -0.1759290 -0.1142793  0.77890427 -0.32750731  0.1116065
## 4  0.2458596 -0.1458688  0.5377063 -0.23796443 -0.32264723  0.1271438
## 5  0.1028150 -1.3416082 -0.4933328 -0.67464841  0.08182104  0.5958388
## 6 -0.2449230 -0.3465075  0.2416352 -0.08000454 -0.65632731  0.4375325
##          PC14        PC15       PC16
## 1  0.34331438  0.06470765  0.2775406
## 2 -0.34679390  0.35132641 -0.2098471
## 3 -0.57704395  0.20013724 -0.3038324
## 4  0.01277471  0.01392121  0.2017119
## 5  0.16943082  0.07061568 -0.1729256
## 6  0.76361391 -0.88491960 -0.1716738
luu1 <-data.frame(luu1)
luu1 <-luu1 %>% select(1:4)
names(luu1) <-c("COM", "TAD", "PAM", "RAS")

# Uoc luong bien phu thuoc
solieu2 <- dulieu2 %>% select(1:4)
head(solieu2)
##   SAT1 SAT2 SAT3 SAT4
## 1    4    1    4    3
## 2    5    5    4    5
## 3    5    5    5    5
## 4    4    4    5    4
## 5    3    3    3    4
## 6    2    2    2    2
pca2 <- prcomp(solieu2,scale=TRUE)
luu2 <- predict(pca2, newdata = solieu2)
head(luu2)
##          PC1         PC2         PC3         PC4
## 1  1.3591519  1.55684951 -1.45729207  0.27280244
## 2 -1.4704884 -0.44136583  0.08034238  0.51644106
## 3 -1.9007400  0.03018929 -0.01381734 -0.01880041
## 4 -0.6326041  0.56343499 -0.09187708 -0.55465713
## 5  1.1209501 -0.38297761 -0.47383882 -0.10463499
## 6  3.1944226  0.21526101  0.03448260 -0.02064618
luu2 <-data.frame(luu2)
luu2 <- luu2 %>% select(1)
names(luu2) <- "SAT"

# Ket hop du lieu
luu <-data.frame(luu1, luu2)
head(luu)
##          COM        TAD        PAM        RAS        SAT
## 1  2.3874770  2.9659111  0.6881738  0.7468256  1.3591519
## 2 -3.9265125  0.7218024  0.8523711  1.4195603 -1.4704884
## 3  0.6295641 -1.1517060 -0.4303911 -0.3055716 -1.9007400
## 4  1.6223557  0.2643848 -1.8117744  0.4810545 -0.6326041
## 5 -1.9466703 -2.6074964 -1.2601078  0.7970574  1.1209501
## 6  0.9221323 -0.4334297  0.8428584 -0.2521046  3.1944226
ols <-lm(data=luu, SAT~ COM + TAD + PAM + RAS)
summary(ols)
## 
## Call:
## lm(formula = SAT ~ COM + TAD + PAM + RAS, data = luu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6964 -1.1821 -0.1110  0.5889  5.1407 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.290e-16  1.375e-01   0.000    1.000
## COM         -7.230e-02  7.480e-02  -0.967    0.335
## TAD         -1.086e-01  7.982e-02  -1.361    0.176
## PAM          8.869e-02  8.553e-02   1.037    0.301
## RAS         -1.282e-01  8.609e-02  -1.490    0.139
## 
## Residual standard error: 1.684 on 145 degrees of freedom
## Multiple R-squared:  0.04024,    Adjusted R-squared:  0.01377 
## F-statistic:  1.52 on 4 and 145 DF,  p-value: 0.1994