I Kết nối dữ liệu
library(foreign)
setwd("c:/vidu")
dulieu <-read.dta("EFA.dta")
head(dulieu)
## SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1 4 1 4 3 3 3 2 3 2 2 3 2 4 4 5
## 2 5 5 4 5 1 2 3 5 5 5 5 5 2 2 2
## 3 5 5 5 5 4 4 5 4 4 4 3 4 4 4 5
## 4 4 4 5 4 3 4 3 3 3 4 3 4 5 5 5
## 5 3 3 3 4 4 4 3 4 5 5 5 5 3 5 3
## 6 2 2 2 2 5 5 2 5 3 4 3 3 4 4 3
## PAM4 TAD1 TAD2 TAD3 TAD4
## 1 4 5 5 5 5
## 2 2 4 4 4 4
## 3 4 3 4 3 3
## 4 5 3 4 2 5
## 5 3 2 2 3 2
## 6 4 4 4 3 4
library(tidyverse)
## -- Attaching packages ------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
dulieu2 <-dulieu %>% select(1:20)
head(dulieu2)
## SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1 4 1 4 3 3 3 2 3 2 2 3 2 4 4 5
## 2 5 5 4 5 1 2 3 5 5 5 5 5 2 2 2
## 3 5 5 5 5 4 4 5 4 4 4 3 4 4 4 5
## 4 4 4 5 4 3 4 3 3 3 4 3 4 5 5 5
## 5 3 3 3 4 4 4 3 4 5 5 5 5 3 5 3
## 6 2 2 2 2 5 5 2 5 3 4 3 3 4 4 3
## PAM4 TAD1 TAD2 TAD3 TAD4
## 1 4 5 5 5 5
## 2 2 4 4 4 4
## 3 4 3 4 3 3
## 4 5 3 4 2 5
## 5 3 2 2 3 2
## 6 4 4 4 3 4
II Biểu đồ tương quan biến
corrEFA <- cor(dulieu2)
plot(corrEFA)

library(corrplot)
## corrplot 0.84 loaded
corrplot(corrEFA, method="color")

III Phân tích tần suất của biến
library(likert)
## Loading required package: xtable
##
## Attaching package: 'likert'
## The following object is masked from 'package:dplyr':
##
## recode
dulieu[] <-lapply(dulieu[], as.factor)
dulieu <- likert(dulieu)
plot(dulieu, type="heat")

plot(dulieu, centered=FALSE)

IV Kiểm tra Conback Alpha
# Kiểm tra thang đo SAT
psych::alpha(dulieu2[,c(1,2,3,4)])
##
## Reliability analysis
## Call: psych::alpha(x = dulieu2[, c(1, 2, 3, 4)])
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.87 0.87 0.87 0.62 6.6 0.019 3.9 1 0.59
##
## lower alpha upper 95% confidence boundaries
## 0.83 0.87 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SAT1 0.78 0.78 0.71 0.54 3.5 0.031 0.00258 0.55
## SAT2 0.84 0.85 0.84 0.65 5.6 0.024 0.04022 0.59
## SAT3 0.82 0.82 0.76 0.61 4.6 0.025 0.00089 0.59
## SAT4 0.87 0.87 0.86 0.69 6.6 0.020 0.02791 0.64
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SAT1 150 0.91 0.92 0.93 0.84 3.9 1.1
## SAT2 150 0.82 0.82 0.71 0.68 3.9 1.2
## SAT3 150 0.86 0.86 0.85 0.73 3.8 1.2
## SAT4 150 0.79 0.79 0.66 0.62 4.0 1.2
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## SAT1 0.05 0.05 0.24 0.31 0.35 0
## SAT2 0.08 0.07 0.07 0.39 0.38 0
## SAT3 0.05 0.06 0.37 0.10 0.43 0
## SAT4 0.08 0.07 0.07 0.37 0.41 0
# Kiểm tra thang do RAS
psych::alpha(dulieu2[,5:8])
##
## Reliability analysis
## Call: psych::alpha(x = dulieu2[, 5:8])
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.83 0.83 0.83 0.55 5 0.022 3.8 0.99 0.57
##
## lower alpha upper 95% confidence boundaries
## 0.79 0.83 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## RAS1 0.70 0.70 0.61 0.44 2.3 0.043 0.0045 0.41
## RAS2 0.81 0.81 0.78 0.59 4.3 0.027 0.0258 0.62
## RAS3 0.82 0.82 0.76 0.60 4.5 0.026 0.0061 0.62
## RAS4 0.81 0.81 0.80 0.59 4.4 0.027 0.0331 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## RAS1 150 0.93 0.92 0.93 0.85 3.6 1.2
## RAS2 150 0.79 0.79 0.69 0.61 3.9 1.3
## RAS3 150 0.77 0.77 0.70 0.59 3.6 1.2
## RAS4 150 0.77 0.78 0.66 0.60 4.0 1.2
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## RAS1 0.09 0.07 0.29 0.25 0.29 0
## RAS2 0.09 0.07 0.15 0.31 0.39 0
## RAS3 0.04 0.13 0.39 0.07 0.37 0
## RAS4 0.06 0.07 0.11 0.35 0.41 0
# Kiểm tra thang đo COM
dulieu2 %>% select(9:12) %>% psych::alpha()
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.9 0.9 0.9 0.7 9.4 0.013 3.8 0.98 0.69
##
## lower alpha upper 95% confidence boundaries
## 0.88 0.9 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## COM1 0.84 0.84 0.78 0.64 5.4 0.022 0.00172 0.63
## COM2 0.89 0.89 0.88 0.73 8.2 0.016 0.01852 0.71
## COM3 0.87 0.87 0.82 0.70 6.8 0.018 0.00011 0.69
## COM4 0.89 0.89 0.87 0.73 8.2 0.016 0.01764 0.69
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## COM1 150 0.93 0.93 0.93 0.87 3.7 1.1
## COM2 150 0.85 0.85 0.77 0.74 3.9 1.1
## COM3 150 0.89 0.89 0.87 0.79 3.7 1.1
## COM4 150 0.85 0.85 0.78 0.74 4.0 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## COM1 0.03 0.09 0.33 0.25 0.30 0
## COM2 0.03 0.13 0.11 0.35 0.37 0
## COM3 0.03 0.08 0.45 0.07 0.37 0
## COM4 0.03 0.12 0.11 0.34 0.40 0
# Kiểm tra thang đo PAM
dulieu2 %>% select(c(13,14,15,16)) %>% psych::alpha()
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.84 0.61 6.2 0.019 3.8 0.92 0.62
##
## lower alpha upper 95% confidence boundaries
## 0.82 0.86 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## PAM1 0.77 0.77 0.69 0.52 3.3 0.033 0.0027 0.54
## PAM2 0.83 0.83 0.80 0.62 4.9 0.025 0.0201 0.67
## PAM3 0.84 0.84 0.78 0.64 5.2 0.023 0.0037 0.67
## PAM4 0.85 0.85 0.81 0.65 5.6 0.022 0.0104 0.68
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## PAM1 150 0.91 0.92 0.91 0.84 3.7 1.1
## PAM2 150 0.83 0.83 0.73 0.68 3.9 1.1
## PAM3 150 0.82 0.82 0.74 0.67 3.7 1.2
## PAM4 150 0.80 0.80 0.70 0.64 4.0 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## PAM1 0.02 0.11 0.31 0.28 0.27 0
## PAM2 0.02 0.15 0.09 0.40 0.34 0
## PAM3 0.01 0.14 0.39 0.09 0.37 0
## PAM4 0.01 0.15 0.09 0.33 0.43 0
# Kiểm tra thang do TAD
dulieu2 %>% select(17:20) %>% psych::alpha()
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.86 0.62 6.4 0.018 3.8 0.93 0.6
##
## lower alpha upper 95% confidence boundaries
## 0.83 0.86 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## TAD1 0.76 0.76 0.68 0.52 3.2 0.034 0.0014 0.52
## TAD2 0.85 0.85 0.84 0.65 5.6 0.022 0.0309 0.65
## TAD3 0.83 0.83 0.77 0.62 4.9 0.024 0.0037 0.65
## TAD4 0.86 0.86 0.84 0.67 6.1 0.021 0.0236 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## TAD1 150 0.93 0.93 0.94 0.87 3.7 1.1
## TAD2 150 0.81 0.81 0.70 0.66 4.0 1.1
## TAD3 150 0.85 0.84 0.81 0.71 3.6 1.2
## TAD4 150 0.79 0.79 0.68 0.63 4.0 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## TAD1 0.04 0.09 0.30 0.31 0.27 0
## TAD2 0.03 0.12 0.05 0.45 0.35 0
## TAD3 0.03 0.14 0.35 0.15 0.32 0
## TAD4 0.02 0.11 0.12 0.39 0.36 0
V Kiểm định cần thiết
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Kiểm định KMO
KMO(dulieu2)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = dulieu2)
## Overall MSA = 0.7
## MSA for each item =
## SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3 PAM4
## 0.65 0.80 0.67 0.79 0.62 0.66 0.60 0.73 0.66 0.79 0.68 0.81 0.68 0.84 0.68 0.77
## TAD1 TAD2 TAD3 TAD4
## 0.63 0.79 0.62 0.73
# Kiểm định Bartlett
cortest.bartlett(dulieu2)
## R was not square, finding R from data
## $chisq
## [1] 1834.681
##
## $p.value
## [1] 1.237199e-266
##
## $df
## [1] 190
VI Phân tích nhân tố
library(GPArotation)
pca <- prcomp(dulieu2,scale=TRUE)
pca
## Standard deviations (1, .., p=20):
## [1] 1.8924523 1.8001221 1.6842887 1.6140053 1.4964414 0.8763247 0.8388667
## [8] 0.7794366 0.7053428 0.6836881 0.6654456 0.6491739 0.6078680 0.5660541
## [15] 0.4976879 0.4451576 0.3995541 0.3297970 0.3136155 0.2934569
##
## Rotation (n x k) = (20 x 20):
## PC1 PC2 PC3 PC4 PC5 PC6
## SAT1 -0.28710124 0.2220401 -0.21637375 0.024205663 -0.35355132 0.12026415
## SAT2 -0.23155529 0.2649554 -0.10770054 -0.051243731 -0.31962761 -0.11172325
## SAT3 -0.29573856 0.2039781 -0.21361032 -0.003950633 -0.29063727 0.13615981
## SAT4 -0.24750828 0.1858605 -0.17850663 -0.071340662 -0.27302013 -0.25146895
## RAS1 0.12597519 -0.3420626 0.11246452 0.074303983 -0.41350372 0.13446443
## RAS2 0.08939154 -0.3192606 0.10694571 0.030753821 -0.31553673 -0.32414827
## RAS3 0.08574749 -0.2754768 0.06803262 0.029086795 -0.37850473 0.55932439
## RAS4 0.14798485 -0.2889662 0.02119947 0.085582936 -0.33076420 -0.38313422
## COM1 0.32668648 0.2257259 -0.14891414 0.299284931 -0.11827861 0.10857508
## COM2 0.32681502 0.2147279 -0.10268569 0.241981119 -0.06177128 -0.19328309
## COM3 0.33461628 0.2124067 -0.12851155 0.266401120 -0.07990923 0.02266031
## COM4 0.28785031 0.2281613 -0.10283503 0.302923126 -0.01299116 0.01394303
## PAM1 -0.26902725 -0.1497459 -0.02455888 0.433908471 0.09860167 0.03874956
## PAM2 -0.24167616 -0.1155474 -0.01341206 0.389347239 0.13486790 -0.01542083
## PAM3 -0.20694002 -0.1172711 0.01356218 0.420501221 0.08363126 0.27537116
## PAM4 -0.26613558 -0.1283162 -0.01444170 0.351076977 0.06038844 -0.32374818
## TAD1 -0.06600491 0.1947318 0.49813986 0.092564915 -0.08339359 -0.11488358
## TAD2 -0.04366643 0.2481526 0.40538651 -0.011184740 -0.04234324 0.18458173
## TAD3 -0.03273260 0.1739609 0.44532009 0.087851605 -0.11589642 -0.15206789
## TAD4 -0.04463748 0.1898232 0.40391636 0.099156338 -0.07428536 0.07655294
## PC7 PC8 PC9 PC10 PC11 PC12
## SAT1 -0.13803240 0.13236255 -0.211350316 0.05119322 -0.141304042 0.052366492
## SAT2 0.24242199 0.04676304 0.396913136 -0.08288688 0.004737086 -0.005075803
## SAT3 -0.29644195 0.30060183 -0.295200273 -0.02552462 -0.067764761 0.066857734
## SAT4 0.28233444 -0.50855533 0.144786823 0.10034154 0.106408430 -0.175974557
## RAS1 -0.01244359 -0.04945080 0.032536927 -0.07503037 -0.002051218 0.026458100
## RAS2 0.34853668 0.14506299 -0.170901472 -0.48543268 -0.344351659 -0.026463047
## RAS3 -0.15935276 -0.07071634 0.363716686 -0.04460158 0.215256990 -0.047112476
## RAS4 -0.10749216 -0.02246508 -0.156463901 0.66656640 0.033260066 0.207794844
## COM1 0.03331994 -0.18910677 -0.141605234 -0.20380026 0.086966152 0.024153378
## COM2 -0.05139932 0.31704926 0.281889010 0.07591575 0.036439923 -0.225994918
## COM3 -0.01223960 -0.21807928 -0.414327477 -0.15546413 0.127746346 0.077714327
## COM4 0.04950189 0.07165789 0.355099824 0.14762421 -0.253816790 0.143366470
## PAM1 0.13565006 0.01207459 0.007292349 -0.11322547 0.002141764 -0.115885895
## PAM2 -0.23346440 -0.09822032 0.203987127 -0.01373062 -0.533409410 0.263685994
## PAM3 0.33870420 -0.07390963 -0.222368203 0.32439624 0.007344842 -0.376578449
## PAM4 -0.17257916 0.03804639 0.100867424 -0.22869074 0.609558605 0.302269121
## TAD1 -0.11209634 -0.07994565 -0.028463136 0.05360291 -0.086025364 -0.210982028
## TAD2 0.20646880 -0.28636638 -0.033515554 0.03015811 -0.053838449 0.579195642
## TAD3 -0.45733151 -0.16151326 -0.004670854 -0.12053629 -0.028151665 -0.362227687
## TAD4 0.32847748 0.53117558 -0.046154058 0.10050940 0.202981477 0.097047050
## PC13 PC14 PC15 PC16 PC17
## SAT1 -0.106937458 0.07298397 -0.05190413 -0.0428869143 -0.116943948
## SAT2 0.651535294 0.08743057 -0.21153503 0.1805961019 -0.023316939
## SAT3 -0.148228430 0.11421886 0.10780359 -0.0102949642 0.061409406
## SAT4 -0.411605172 -0.30038241 0.16894279 -0.0908959152 0.112862310
## RAS1 -0.091565429 0.02007611 0.08395996 0.5601985919 0.442751062
## RAS2 -0.127018359 0.08961031 -0.01081394 -0.1263012604 -0.265495857
## RAS3 -0.010174060 -0.10503915 0.01254775 -0.3522564974 -0.241273076
## RAS4 0.204237084 0.02383063 -0.06008368 -0.1911121335 -0.008069188
## COM1 0.096240679 -0.09844933 -0.05215839 -0.0761941306 -0.177571810
## COM2 -0.005860717 0.13426761 0.67447632 0.0356959797 -0.079468304
## COM3 0.228198968 -0.23042167 -0.05291890 0.0006315171 0.139309181
## COM4 -0.421140834 0.22337988 -0.51072030 0.0117390061 0.104067720
## PAM1 0.139766011 0.24285326 0.06217200 -0.4991915523 0.565010323
## PAM2 0.110254877 -0.48683096 0.16539940 0.0959878798 -0.106095931
## PAM3 0.002406408 0.15632352 -0.01830424 0.3332414202 -0.323712448
## PAM4 -0.164832058 0.09840823 -0.09186917 0.1485629835 -0.225229222
## TAD1 -0.002289379 0.03855802 -0.09378872 -0.2170390291 -0.215068228
## TAD2 -0.011251143 0.36301871 0.33941012 0.0111343426 -0.024186635
## TAD3 0.015777835 0.05439403 -0.11040682 0.1424424975 0.144700065
## TAD4 -0.091452916 -0.51640624 -0.06328452 -0.0230290039 0.150348098
## PC18 PC19 PC20
## SAT1 -0.054459005 0.7273736810 -0.0884545839
## SAT2 0.011197099 -0.1036900085 -0.0311900481
## SAT3 0.015314739 -0.6255111314 0.0718754991
## SAT4 0.038235267 -0.0845562161 -0.0005973106
## RAS1 -0.347286256 0.0530967885 0.0279437323
## RAS2 0.176844013 -0.0479145565 -0.0460567126
## RAS3 0.155954388 -0.0580163567 -0.1437645205
## RAS4 0.091766817 -0.0327385208 0.1162748537
## COM1 -0.180013452 0.0197525181 0.7116083916
## COM2 0.004692165 0.0702654126 -0.1024362855
## COM3 0.098480481 -0.0636732827 -0.5843789260
## COM4 0.064142611 -0.0871989536 -0.1056113833
## PAM1 -0.035357353 0.0682858872 0.0593579661
## PAM2 0.003570740 -0.0321532082 -0.0009987273
## PAM3 0.133678353 -0.0850444891 -0.0338485161
## PAM4 -0.039694851 0.0247607488 -0.0621995268
## TAD1 -0.671646978 -0.1146167357 -0.1959827122
## TAD2 0.152484456 -0.0004124992 0.0091510350
## TAD3 0.507599275 0.0800078304 0.1536464024
## TAD4 0.107803919 0.0462924856 0.0936756735
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Phương sai trích
u <- get_eig(pca)
u
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.58137553 17.9068776 17.90688
## Dim.2 3.24043969 16.2021985 34.10908
## Dim.3 2.83682835 14.1841418 48.29322
## Dim.4 2.60501303 13.0250652 61.31828
## Dim.5 2.23933677 11.1966839 72.51497
## Dim.6 0.76794496 3.8397248 76.35469
## Dim.7 0.70369734 3.5184867 79.87318
## Dim.8 0.60752145 3.0376072 82.91079
## Dim.9 0.49750846 2.4875423 85.39833
## Dim.10 0.46742945 2.3371473 87.73548
## Dim.11 0.44281782 2.2140891 89.94956
## Dim.12 0.42142676 2.1071338 92.05670
## Dim.13 0.36950355 1.8475177 93.90422
## Dim.14 0.32041723 1.6020861 95.50630
## Dim.15 0.24769321 1.2384660 96.74477
## Dim.16 0.19816526 0.9908263 97.73559
## Dim.17 0.15964347 0.7982173 98.53381
## Dim.18 0.10876604 0.5438302 99.07764
## Dim.19 0.09835470 0.4917735 99.56942
## Dim.20 0.08611694 0.4305847 100.00000
y <- u%>% mutate(NhanTo = 1:20)
y
## eigenvalue variance.percent cumulative.variance.percent NhanTo
## 1 3.58137553 17.9068776 17.90688 1
## 2 3.24043969 16.2021985 34.10908 2
## 3 2.83682835 14.1841418 48.29322 3
## 4 2.60501303 13.0250652 61.31828 4
## 5 2.23933677 11.1966839 72.51497 5
## 6 0.76794496 3.8397248 76.35469 6
## 7 0.70369734 3.5184867 79.87318 7
## 8 0.60752145 3.0376072 82.91079 8
## 9 0.49750846 2.4875423 85.39833 9
## 10 0.46742945 2.3371473 87.73548 10
## 11 0.44281782 2.2140891 89.94956 11
## 12 0.42142676 2.1071338 92.05670 12
## 13 0.36950355 1.8475177 93.90422 13
## 14 0.32041723 1.6020861 95.50630 14
## 15 0.24769321 1.2384660 96.74477 15
## 16 0.19816526 0.9908263 97.73559 16
## 17 0.15964347 0.7982173 98.53381 17
## 18 0.10876604 0.5438302 99.07764 18
## 19 0.09835470 0.4917735 99.56942 19
## 20 0.08611694 0.4305847 100.00000 20
y %>% ggplot(aes(NhanTo, eigenvalue)) + geom_line() + geom_point(color="red", size=3)

# Lựa chọn nhân tố bằng %
fviz_screeplot(pca, addlabels=TRUE, n=20)

# Lựa chọn bằng giá trị eigenvalue
fviz_screeplot(pca, addlabels=TRUE, n=20, choice="eigenvalue")

VII Ma trận xoay
#Như ở trên chúng ta có 5 nhân tố
xoay <- principal(dulieu2,nfactors=5, rotate="varimax")
xoay
## Principal Components Analysis
## Call: principal(r = dulieu2, nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC5 RC3 RC4 RC2 h2 u2 com
## SAT1 0.01 0.93 -0.02 0.11 -0.04 0.87 0.13 1.0
## SAT2 0.00 0.81 0.14 -0.06 -0.08 0.69 0.31 1.1
## SAT3 -0.05 0.86 -0.05 0.10 -0.09 0.77 0.23 1.1
## SAT4 -0.09 0.77 -0.04 -0.02 -0.07 0.60 0.40 1.1
## RAS1 -0.01 -0.09 0.01 0.02 0.93 0.87 0.13 1.0
## RAS2 -0.10 -0.12 -0.01 0.01 0.77 0.62 0.38 1.1
## RAS3 -0.04 0.01 -0.02 -0.02 0.78 0.61 0.39 1.0
## RAS4 0.09 -0.09 -0.11 0.01 0.77 0.61 0.39 1.1
## COM1 0.93 0.03 -0.02 -0.05 0.03 0.87 0.13 1.0
## COM2 0.84 -0.06 0.01 -0.11 -0.01 0.72 0.28 1.0
## COM3 0.89 -0.04 -0.02 -0.08 0.01 0.79 0.21 1.0
## COM4 0.85 -0.06 0.03 0.02 -0.09 0.73 0.27 1.0
## PAM1 -0.06 0.04 -0.03 0.92 0.02 0.85 0.15 1.0
## PAM2 -0.05 0.00 -0.01 0.83 -0.05 0.69 0.31 1.0
## PAM3 0.01 -0.01 0.04 0.82 0.03 0.67 0.33 1.0
## PAM4 -0.11 0.09 -0.01 0.78 0.02 0.64 0.36 1.1
## TAD1 -0.03 -0.01 0.94 0.05 -0.01 0.88 0.12 1.0
## TAD2 -0.02 0.03 0.80 -0.12 -0.14 0.68 0.32 1.1
## TAD3 0.02 0.00 0.84 0.02 0.05 0.71 0.29 1.0
## TAD4 0.04 0.01 0.79 0.05 -0.03 0.62 0.38 1.0
##
## RC1 RC5 RC3 RC4 RC2
## SS loadings 3.14 2.89 2.89 2.88 2.71
## Proportion Var 0.16 0.14 0.14 0.14 0.14
## Cumulative Var 0.16 0.30 0.45 0.59 0.73
## Proportion Explained 0.22 0.20 0.20 0.20 0.19
## Cumulative Proportion 0.22 0.42 0.61 0.81 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 5 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 148.45 with prob < 0.0012
##
## Fit based upon off diagonal values = 0.96
# Chọn Loading Factor là 0.55
print.psych(xoay, cut=0.55, sort=TRUE)
## Principal Components Analysis
## Call: principal(r = dulieu2, nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item RC1 RC5 RC3 RC4 RC2 h2 u2 com
## COM1 9 0.93 0.87 0.13 1.0
## COM3 11 0.89 0.79 0.21 1.0
## COM4 12 0.85 0.73 0.27 1.0
## COM2 10 0.84 0.72 0.28 1.0
## SAT1 1 0.93 0.87 0.13 1.0
## SAT3 3 0.86 0.77 0.23 1.1
## SAT2 2 0.81 0.69 0.31 1.1
## SAT4 4 0.77 0.60 0.40 1.1
## TAD1 17 0.94 0.88 0.12 1.0
## TAD3 19 0.84 0.71 0.29 1.0
## TAD2 18 0.80 0.68 0.32 1.1
## TAD4 20 0.79 0.62 0.38 1.0
## PAM1 13 0.92 0.85 0.15 1.0
## PAM2 14 0.83 0.69 0.31 1.0
## PAM3 15 0.82 0.67 0.33 1.0
## PAM4 16 0.78 0.64 0.36 1.1
## RAS1 5 0.93 0.87 0.13 1.0
## RAS3 7 0.78 0.61 0.39 1.0
## RAS2 6 0.77 0.62 0.38 1.1
## RAS4 8 0.77 0.61 0.39 1.1
##
## RC1 RC5 RC3 RC4 RC2
## SS loadings 3.14 2.89 2.89 2.88 2.71
## Proportion Var 0.16 0.14 0.14 0.14 0.14
## Cumulative Var 0.16 0.30 0.45 0.59 0.73
## Proportion Explained 0.22 0.20 0.20 0.20 0.19
## Cumulative Proportion 0.22 0.42 0.61 0.81 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 5 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 148.45 with prob < 0.0012
##
## Fit based upon off diagonal values = 0.96
VIII Ước lượng nhân tố
uocluong <- predict(pca, newdata = dulieu2)
head(uocluong)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 -2.027451 -0.3087310 2.7204660 -0.6181991 2.2289661 0.6461760 -0.6027724
## 2 1.890772 3.6476452 -1.0269968 -1.5938388 -0.7063449 -0.4935091 -0.3686621
## 3 -1.184711 -0.2423145 -1.4049121 0.3801400 -1.4650525 1.0210511 0.3076885
## 4 -1.979745 -0.2035598 -0.7188705 1.1974250 0.9253456 0.4146206 1.1024258
## 5 2.544398 -0.6033350 -2.5742686 0.4079847 0.7733712 -0.5022852 -0.6598850
## 6 1.308336 -2.4337218 1.5070800 -0.3854037 1.5846968 -1.2309604 0.2446514
## PC8 PC9 PC10 PC11 PC12 PC13
## 1 0.02571746 -2.37086833 0.396807339 -0.08423920 -0.3908935 -0.87590789
## 2 -0.29426217 -0.21006244 1.466026170 0.15206048 -0.3240190 0.53364945
## 3 -0.38076150 0.43922187 0.242283258 -0.15303352 -0.3368169 -0.07553634
## 4 1.33069726 0.03238643 -0.108806392 -0.08818959 0.5196258 -0.45926979
## 5 -0.79243004 0.25815926 -0.400087932 -1.19454824 -0.4670095 -0.01799287
## 6 0.54106156 -0.59200419 0.004466051 -0.40110941 0.4921677 0.21579935
## PC14 PC15 PC16 PC17 PC18 PC19
## 1 -0.07206824 0.2917226 0.243580891 0.24054891 -0.003806319 0.3772994
## 2 -0.17161063 -0.4803949 -1.137895332 -0.32114601 0.338607503 0.4002341
## 3 0.66940820 0.3096231 0.096058982 -0.55779209 0.198253694 -0.1396824
## 4 -0.13890899 0.5031072 0.009476989 0.07560331 0.024599057 -0.5067870
## 5 -0.80351038 0.1435355 0.564288322 0.23315915 0.113136360 -0.0946332
## 6 0.24336639 0.6551518 0.330943041 0.69359086 -0.895532238 0.1777442
## PC20
## 1 -0.2147253
## 2 0.1286330
## 3 0.2659435
## 4 -0.1363060
## 5 0.1781751
## 6 0.2410815
head(dulieu2)
## SAT1 SAT2 SAT3 SAT4 RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3
## 1 4 1 4 3 3 3 2 3 2 2 3 2 4 4 5
## 2 5 5 4 5 1 2 3 5 5 5 5 5 2 2 2
## 3 5 5 5 5 4 4 5 4 4 4 3 4 4 4 5
## 4 4 4 5 4 3 4 3 3 3 4 3 4 5 5 5
## 5 3 3 3 4 4 4 3 4 5 5 5 5 3 5 3
## 6 2 2 2 2 5 5 2 5 3 4 3 3 4 4 3
## PAM4 TAD1 TAD2 TAD3 TAD4
## 1 4 5 5 5 5
## 2 2 4 4 4 4
## 3 4 3 4 3 3
## 4 5 3 4 2 5
## 5 3 2 2 3 2
## 6 4 4 4 3 4
uocluong2 <-data.frame(uocluong)
uocluong3 <- uocluong2 %>% select(1:5)
names(uocluong3) <-c("COM", "SAT", "TAD", "PAM", "RAS")
head(uocluong3)
## COM SAT TAD PAM RAS
## 1 -2.027451 -0.3087310 2.7204660 -0.6181991 2.2289661
## 2 1.890772 3.6476452 -1.0269968 -1.5938388 -0.7063449
## 3 -1.184711 -0.2423145 -1.4049121 0.3801400 -1.4650525
## 4 -1.979745 -0.2035598 -0.7188705 1.1974250 0.9253456
## 5 2.544398 -0.6033350 -2.5742686 0.4079847 0.7733712
## 6 1.308336 -2.4337218 1.5070800 -0.3854037 1.5846968
IX Hồi quy OLS
ols <-lm(SAT ~ COM + TAD + PAM + RAS, data=uocluong3)
summary(ols)
##
## Call:
## lm(formula = SAT ~ COM + TAD + PAM + RAS, data = uocluong3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3386 -1.3264 -0.1159 1.3118 4.7138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.252e-17 1.490e-01 0 1
## COM 4.229e-16 7.899e-02 0 1
## TAD -2.869e-16 8.876e-02 0 1
## PAM -1.493e-16 9.262e-02 0 1
## RAS 5.349e-16 9.990e-02 0 1
##
## Residual standard error: 1.825 on 145 degrees of freedom
## Multiple R-squared: 3.891e-31, Adjusted R-squared: -0.02759
## F-statistic: 1.411e-29 on 4 and 145 DF, p-value: 1
X, Mở rộng
# Uoc luong bien doc lap
solieu1 <- dulieu2 %>% select(5:20)
head(solieu1)
## RAS1 RAS2 RAS3 RAS4 COM1 COM2 COM3 COM4 PAM1 PAM2 PAM3 PAM4 TAD1 TAD2 TAD3
## 1 3 3 2 3 2 2 3 2 4 4 5 4 5 5 5
## 2 1 2 3 5 5 5 5 5 2 2 2 2 4 4 4
## 3 4 4 5 4 4 4 3 4 4 4 5 4 3 4 3
## 4 3 4 3 3 3 4 3 4 5 5 5 5 3 4 2
## 5 4 4 3 4 5 5 5 5 3 5 3 3 2 2 3
## 6 5 5 2 5 3 4 3 3 4 4 3 4 4 4 3
## TAD4
## 1 5
## 2 4
## 3 3
## 4 5
## 5 2
## 6 4
pca1 <- prcomp(solieu1,scale=TRUE)
luu1 <- predict(pca1, newdata = solieu1)
head(luu1)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 2.3874770 2.9659111 0.6881738 0.7468256 0.1748730 0.3138086 -0.98134786
## 2 -3.9265125 0.7218024 0.8523711 1.4195603 -0.5124710 -0.4013547 0.01655909
## 3 0.6295641 -1.1517060 -0.4303911 -0.3055716 1.1863367 -0.2801499 0.12953984
## 4 1.6223557 0.2643848 -1.8117744 0.4810545 0.5578136 1.5378236 0.68343481
## 5 -1.9466703 -2.6074964 -1.2601078 0.7970574 -0.5618247 -0.8465233 -0.30444719
## 6 0.9221323 -0.4334297 0.8428584 -0.2521046 -1.3132127 0.9796125 -0.43965052
## PC8 PC9 PC10 PC11 PC12 PC13
## 1 -0.4792780 0.3740181 -0.8693007 -0.26035969 0.16468992 0.5760037
## 2 -1.3793282 0.6119236 -0.3493153 -0.17164864 0.15977475 -1.2328603
## 3 -0.2075910 -0.1759290 -0.1142793 0.77890427 -0.32750731 0.1116065
## 4 0.2458596 -0.1458688 0.5377063 -0.23796443 -0.32264723 0.1271438
## 5 0.1028150 -1.3416082 -0.4933328 -0.67464841 0.08182104 0.5958388
## 6 -0.2449230 -0.3465075 0.2416352 -0.08000454 -0.65632731 0.4375325
## PC14 PC15 PC16
## 1 0.34331438 0.06470765 0.2775406
## 2 -0.34679390 0.35132641 -0.2098471
## 3 -0.57704395 0.20013724 -0.3038324
## 4 0.01277471 0.01392121 0.2017119
## 5 0.16943082 0.07061568 -0.1729256
## 6 0.76361391 -0.88491960 -0.1716738
luu1 <-data.frame(luu1)
luu1 <-luu1 %>% select(1:4)
names(luu1) <-c("COM", "TAD", "PAM", "RAS")
# Uoc luong bien phu thuoc
solieu2 <- dulieu2 %>% select(1:4)
head(solieu2)
## SAT1 SAT2 SAT3 SAT4
## 1 4 1 4 3
## 2 5 5 4 5
## 3 5 5 5 5
## 4 4 4 5 4
## 5 3 3 3 4
## 6 2 2 2 2
pca2 <- prcomp(solieu2,scale=TRUE)
luu2 <- predict(pca2, newdata = solieu2)
head(luu2)
## PC1 PC2 PC3 PC4
## 1 1.3591519 1.55684951 -1.45729207 0.27280244
## 2 -1.4704884 -0.44136583 0.08034238 0.51644106
## 3 -1.9007400 0.03018929 -0.01381734 -0.01880041
## 4 -0.6326041 0.56343499 -0.09187708 -0.55465713
## 5 1.1209501 -0.38297761 -0.47383882 -0.10463499
## 6 3.1944226 0.21526101 0.03448260 -0.02064618
luu2 <-data.frame(luu2)
luu2 <- luu2 %>% select(1)
names(luu2) <- "SAT"
# Ket hop du lieu
luu <-data.frame(luu1, luu2)
head(luu)
## COM TAD PAM RAS SAT
## 1 2.3874770 2.9659111 0.6881738 0.7468256 1.3591519
## 2 -3.9265125 0.7218024 0.8523711 1.4195603 -1.4704884
## 3 0.6295641 -1.1517060 -0.4303911 -0.3055716 -1.9007400
## 4 1.6223557 0.2643848 -1.8117744 0.4810545 -0.6326041
## 5 -1.9466703 -2.6074964 -1.2601078 0.7970574 1.1209501
## 6 0.9221323 -0.4334297 0.8428584 -0.2521046 3.1944226
ols <-lm(data=luu, SAT~ COM + TAD + PAM + RAS)
summary(ols)
##
## Call:
## lm(formula = SAT ~ COM + TAD + PAM + RAS, data = luu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6964 -1.1821 -0.1110 0.5889 5.1407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.290e-16 1.375e-01 0.000 1.000
## COM -7.230e-02 7.480e-02 -0.967 0.335
## TAD -1.086e-01 7.982e-02 -1.361 0.176
## PAM 8.869e-02 8.553e-02 1.037 0.301
## RAS -1.282e-01 8.609e-02 -1.490 0.139
##
## Residual standard error: 1.684 on 145 degrees of freedom
## Multiple R-squared: 0.04024, Adjusted R-squared: 0.01377
## F-statistic: 1.52 on 4 and 145 DF, p-value: 0.1994