6 因素分析與轉軸
dat<-read.table('CESD.dat', header=T)
str(dat)
## 'data.frame': 1000 obs. of 20 variables:
## $ 悲傷 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ 恐懼 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ 寂寞 : int 2 0 1 0 0 0 0 1 0 1 ...
## $ 痛哭 : int 1 1 1 0 1 0 0 0 0 0 ...
## $ 煩惱 : int 2 0 2 0 1 0 0 1 0 0 ...
## $ 失敗 : int 1 0 1 0 1 0 0 0 0 0 ...
## $ 悶悶不樂: int 1 0 2 0 0 0 0 0 0 1 ...
## $ 困擾 : int 0 1 1 0 0 0 0 0 0 1 ...
## $ 費力 : int 1 1 2 1 0 1 0 0 0 1 ...
## $ 不專心 : int 0 1 1 1 1 1 0 0 0 1 ...
## $ 睡眠 : int 1 1 0 0 0 1 0 0 0 1 ...
## $ 寡言 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ 無力 : int 1 1 2 2 1 1 0 0 0 2 ...
## $ 胃口不好: int 0 1 0 2 1 1 0 1 0 0 ...
## $ 不友善 : int 1 1 0 1 0 0 0 0 0 0 ...
## $ 不喜歡 : int 1 0 2 0 0 0 0 0 0 0 ...
## $ 良好 : int 3 2 3 3 3 3 0 3 0 2 ...
## $ 樂趣 : int 3 2 2 2 1 2 0 3 0 1 ...
## $ 快樂 : int 2 1 2 1 2 2 0 3 0 1 ...
## $ 希望 : int 2 1 2 2 3 3 0 3 0 3 ...
require(psych)
## Loading required package: psych
fa.parallel(dat, fa = "pc", show.legend = FALSE)

## Parallel analysis suggests that the number of factors = NA and the number of components = 2
print.psych(fa(dat, nfactor = 2, fm = "pa", rotate = "promax"), cut = .3)
## Loading required namespace: GPArotation
## Factor Analysis using method = pa
## Call: fa(r = dat, nfactors = 2, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## 悲傷 0.84 0.72 0.28 1.0
## 恐懼 0.80 0.63 0.37 1.0
## 寂寞 0.78 0.62 0.38 1.0
## 痛哭 0.73 0.52 0.48 1.0
## 煩惱 0.68 0.42 0.58 1.0
## 失敗 0.61 0.49 0.51 1.2
## 悶悶不樂 0.78 0.68 0.32 1.0
## 困擾 0.56 0.29 0.71 1.0
## 費力 0.66 0.43 0.57 1.0
## 不專心 0.66 0.40 0.60 1.0
## 睡眠 0.67 0.43 0.57 1.0
## 寡言 0.52 0.30 0.70 1.0
## 無力 0.59 0.38 0.62 1.0
## 胃口不好 0.51 0.26 0.74 1.0
## 不友善 0.58 0.32 0.68 1.0
## 不喜歡 0.67 0.50 0.50 1.0
## 良好 0.58 0.32 0.68 1.0
## 樂趣 0.80 0.61 0.39 1.0
## 快樂 0.83 0.72 0.28 1.0
## 希望 0.73 0.55 0.45 1.0
##
## PA1 PA2
## SS loadings 7.27 2.30
## Proportion Var 0.36 0.11
## Cumulative Var 0.36 0.48
## Proportion Explained 0.76 0.24
## Cumulative Proportion 0.76 1.00
##
## With factor correlations of
## PA1 PA2
## PA1 1.00 0.42
## PA2 0.42 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 190 and the objective function was 10.19 with Chi Square of 10099.94
## The degrees of freedom for the model are 151 and the objective function was 0.88
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 1000 with the empirical chi square 502.28 with prob < 2.6e-39
## The total number of observations was 1000 with Likelihood Chi Square = 875.06 with prob < 2.3e-102
##
## Tucker Lewis Index of factoring reliability = 0.908
## RMSEA index = 0.07 and the 90 % confidence intervals are 0.065 0.074
## BIC = -168.01
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.97 0.93
## Multiple R square of scores with factors 0.94 0.86
## Minimum correlation of possible factor scores 0.88 0.72
5 區辨函數
dta <- read.table("JobPersonality.txt",head=T)
str(dta)
## 'data.frame': 244 obs. of 4 variables:
## $ JOB : int 1 1 1 1 1 1 1 1 1 1 ...
## $ OUTDOOR : int 10 14 19 14 14 20 6 13 18 16 ...
## $ SOCIAL : int 22 17 33 29 25 25 18 27 31 35 ...
## $ CONSERVATIVE: int 5 6 7 12 7 12 4 7 9 13 ...
library(MASS)
fit <- lda(JOB ~ OUTDOOR+SOCIAL+CONSERVATIVE, data=dta)
fit
## Call:
## lda(JOB ~ OUTDOOR + SOCIAL + CONSERVATIVE, data = dta)
##
## Prior probabilities of groups:
## 1 2 3
## 0.3483607 0.3811475 0.2704918
##
## Group means:
## OUTDOOR SOCIAL CONSERVATIVE
## 1 12.51765 24.22353 9.023529
## 2 18.53763 21.13978 10.139785
## 3 15.57576 15.45455 13.242424
##
## Coefficients of linear discriminants:
## LD1 LD2
## OUTDOOR 0.09198065 -0.22501431
## SOCIAL -0.19427415 -0.04978105
## CONSERVATIVE 0.15499199 0.08734288
##
## Proportion of trace:
## LD1 LD2
## 0.7712 0.2288
fit$scaling
## LD1 LD2
## OUTDOOR 0.09198065 -0.22501431
## SOCIAL -0.19427415 -0.04978105
## CONSERVATIVE 0.15499199 0.08734288
table(predict(fit,newdata =dta)$class , dta$JOB)
##
## 1 2 3
## 1 68 16 3
## 2 13 67 13
## 3 4 10 50
3 典型相關分析
dta <- read.table("interest.txt",head=T)
c <- dta[, 1:6]
o <- dta[, 7:24]
cor(o, c)# correlations
## socdom sociabty stress worry impulsve
## carpentr -0.02655005 0.003410094 0.04521771 0.014433468 0.067667112
## forestr 0.02725324 -0.026765751 -0.06715231 -0.108437803 0.088864778
## morticin 0.11820489 0.067581761 0.12030452 0.164139591 -0.062807848
## policemn -0.16396093 -0.221249337 0.02471191 0.001534416 0.213318537
## fireman -0.11011277 -0.160250335 -0.05833266 -0.096177857 0.171354498
## salesrep 0.10228283 0.114099861 0.12193224 0.122921806 0.058451390
## teacher 0.25800749 0.222603993 0.05881236 0.073260269 -0.079642912
## busexec 0.16692048 0.085727002 0.07554396 0.056706851 -0.040908120
## stockbrk 0.11213631 0.083486232 0.17100579 0.177674459 -0.044264704
## artist 0.07297586 0.046654544 0.08993349 -0.015093768 -0.088625281
## socworkr 0.20441372 0.235231262 0.07171255 0.087162295 -0.122426518
## truckdvr -0.09728773 -0.060229887 -0.14436082 -0.106540263 0.010379224
## doctor 0.17168029 0.181821452 0.07206254 0.102503492 -0.102827790
## clergymn 0.17872165 0.109124259 0.05161889 0.025933920 0.003002856
## actor 0.16935331 0.194134147 0.06281278 0.017834146 0.016812388
## lawyer 0.20960718 0.216391203 0.01961752 0.125692614 -0.113000711
## archtct 0.16948381 0.089977107 0.08594450 0.067541793 -0.023636384
## landscpr 0.06590459 0.059822828 -0.06899785 -0.098504846 0.104378146
## thrillsk
## carpentr 0.065088780
## forestr 0.023975205
## morticin 0.002726423
## policemn 0.168304673
## fireman 0.137353641
## salesrep 0.037179601
## teacher -0.062436080
## busexec -0.036766020
## stockbrk 0.012823461
## artist -0.044848256
## socworkr -0.055867851
## truckdvr -0.039882487
## doctor -0.084253405
## clergymn -0.054379205
## actor -0.020527488
## lawyer -0.011759623
## archtct -0.040691105
## landscpr 0.009815659
#can corr analysis
require(candisc)
## Loading required package: candisc
## Loading required package: car
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## Loading required package: heplots
##
## Attaching package: 'candisc'
## The following object is masked from 'package:stats':
##
## cancor
cc1 <- cancor(c, o, set.names=c("c", "o"))
cc1
##
## Canonical correlation analysis of:
## 6 c variables: socdom, sociabty, stress, worry, impulsve, thrillsk
## with 18 o variables: carpentr, forestr, morticin, policemn, fireman, salesrep, teacher, busexec, stockbrk, artist, socworkr, truckdvr, doctor, clergymn, actor, lawyer, archtct, landscpr
##
## CanR CanRSQ Eigen percent cum scree
## 1 0.4542 0.20627 0.25988 38.235 38.23 ******************************
## 2 0.4094 0.16759 0.20134 29.622 67.86 ***********************
## 3 0.3084 0.09511 0.10510 15.464 83.32 ************
## 4 0.2321 0.05386 0.05692 8.375 91.69 *******
## 5 0.1875 0.03517 0.03645 5.362 97.06 ****
## 6 0.1400 0.01961 0.02000 2.943 100.00 **
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## CanR LR test stat approx F numDF denDF Pr(> F)
## 1 0.45417 0.53507 1.39033 108 1302.40 0.006627 **
## 2 0.40938 0.67413 1.10168 85 1101.77 0.254085
## 3 0.30839 0.80985 0.77392 64 894.86 0.902452
## 4 0.23207 0.89497 0.57602 45 681.08 0.988553
## 5 0.18752 0.94591 0.46315 28 460.00 0.992216
## 6 0.14004 0.98039 0.35544 13 231.00 0.981521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#standarized coefficients 典型相關分析係數
coef(cc1, type="both", standardize=TRUE)
## [[1]]
## Xcan1 Xcan2 Xcan3 Xcan4 Xcan5
## socdom 0.11616690 0.48545341 -0.8121924 0.05953635 -0.7578881
## sociabty 0.71734183 0.07976508 0.4628032 0.25208246 0.8599887
## stress -0.13956999 0.63867240 0.1352582 -0.76161225 0.3885810
## worry 0.10403708 0.22125442 0.4957385 0.44817133 -0.5423593
## impulsve -0.59711733 0.33124032 -0.4448030 0.34227633 0.5224877
## thrillsk -0.06965451 0.12818479 0.5868898 0.40864768 -0.3548656
## Xcan6
## socdom 0.19095155
## sociabty -0.02152472
## stress 0.34871493
## worry -0.70675916
## impulsve -0.54357133
## thrillsk 0.83382010
##
## [[2]]
## Ycan1 Ycan2 Ycan3 Ycan4 Ycan5
## carpentr -0.03806223 0.29761130 0.39269181 0.07820669 0.06142642
## forestr -0.07556844 0.04333148 -0.26651722 -0.01855674 -0.19668223
## morticin -0.24032070 0.18295837 -0.01901644 0.26527614 -0.64687187
## policemn -0.56831667 0.46421926 0.28136104 0.17751664 -0.29508643
## fireman -0.24241914 -0.08230669 -0.28692920 0.31911172 0.18512522
## salesrep -0.13744029 0.45610975 0.13311642 0.05692872 0.46679305
## teacher -0.03474957 0.51116082 -0.41721057 0.38570572 -0.46953964
## busexec 0.10739717 0.05745125 -0.52836667 -0.23890231 0.01124895
## stockbrk -0.15964197 0.57375621 0.39819057 -0.22352599 0.26705045
## artist 0.08401221 -0.03573449 0.05099942 -0.60250043 -0.04821040
## socworkr 0.08471829 0.07997559 0.59593797 -0.09694941 0.44193633
## truckdvr 0.11120624 -0.38004952 -0.04299763 0.17905453 0.01350615
## doctor 0.27240246 -0.16754528 -0.11016365 -0.48830654 0.28847059
## clergymn -0.29287033 0.18798523 -0.51626591 -0.16553149 -0.13426428
## actor 0.24969449 0.27434960 0.02201902 0.32342029 0.49285548
## lawyer 0.70432532 -0.55721785 0.26288947 0.96781654 -0.52870640
## archtct -0.23377301 0.11984314 -0.34992175 -0.24916190 -0.28873903
## landscpr 0.08964156 0.24736280 -0.32860990 0.37333598 0.29885505
## Ycan6
## carpentr 0.269373775
## forestr 0.377868087
## morticin 0.003369288
## policemn -0.155457465
## fireman 0.601112645
## salesrep -0.273051757
## teacher -0.197784848
## busexec 0.134238755
## stockbrk -0.066826540
## artist 0.567653044
## socworkr 0.759324750
## truckdvr -0.353754478
## doctor -0.486546731
## clergymn -0.320914003
## actor -0.023369926
## lawyer 0.296090572
## archtct -0.149400882
## landscpr 0.003395754
redundancy(cc1)
##
## Redundancies for the c variables & total X canonical redundancy
##
## Xcan1 Xcan2 Xcan3 Xcan4 Xcan5 Xcan6 total X|Y
## 0.045509 0.036643 0.013075 0.010828 0.003268 0.002535 0.111857
##
## Redundancies for the o variables & total Y canonical redundancy
##
## Ycan1 Ycan2 Ycan3 Ycan4 Ycan5 Ycan6 total Y|X
## 0.0274356 0.0158775 0.0043984 0.0020075 0.0027820 0.0006899 0.0531909
plot(cc1, smooth=TRUE)

4 manova
dta <- read.table(file = "nmtwins.txt", header = TRUE)
str(dta)
## 'data.frame': 839 obs. of 6 variables:
## $ sex : int 2 2 2 1 1 2 1 1 2 1 ...
## $ english: int 14 20 11 9 15 20 26 28 24 19 ...
## $ math : int 13 20 8 19 23 17 20 31 25 19 ...
## $ socsci : int 17 16 15 7 23 16 27 31 17 17 ...
## $ natsci : int 18 16 16 10 21 12 23 29 21 22 ...
## $ vocab : int 14 13 12 6 21 12 28 30 22 16 ...
#以MANOVA,檢驗英文、數學、社會科學、自然科學與字彙成績,是否在性別上有差異。
#MANOVA
fit <- manova(cbind(english,math,socsci,natsci,vocab) ~ sex ,data=dta)
summary(fit, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## sex 1 0.76565 50.993 5 833 < 2.2e-16 ***
## Residuals 837
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#檢測每一變項
summary.aov(fit)
## Response english :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 230.7 230.727 10.185 0.001469 **
## Residuals 837 18961.0 22.654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response math :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 3154.2 3154.21 87.512 < 2.2e-16 ***
## Residuals 837 30168.2 36.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response socsci :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 268.9 268.869 11.461 0.0007438 ***
## Residuals 837 19635.6 23.459
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response natsci :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 1429.8 1429.81 44.811 3.974e-11 ***
## Residuals 837 26706.5 31.91
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response vocab :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 4 3.9581 0.1706 0.6797
## Residuals 837 19424 23.2064
#Post hoc : Step down analysis
#Bonferroni adjustment of alpha.
#alpha = .05 / 5 =0.01
#排序為:math、natsci、english、socsci、vocab
summary(fit1 <- lm(math ~ sex ,data=dta))
##
## Call:
## lm(formula = math ~ sex, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1909 -4.1909 -0.1909 4.7398 15.7398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.1215 0.6961 38.960 <2e-16 ***
## sex -3.9306 0.4202 -9.355 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.004 on 837 degrees of freedom
## Multiple R-squared: 0.09466, Adjusted R-squared: 0.09358
## F-statistic: 87.51 on 1 and 837 DF, p-value: < 2.2e-16
summary(fit1 <- lm(natsci ~ math+sex ,data=dta))
##
## Call:
## lm(formula = natsci ~ math + sex, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.039 -2.798 0.526 3.102 11.526
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.22638 0.86461 9.515 <2e-16 ***
## math 0.58105 0.02559 22.703 <2e-16 ***
## sex -0.36250 0.32698 -1.109 0.268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.445 on 836 degrees of freedom
## Multiple R-squared: 0.4128, Adjusted R-squared: 0.4114
## F-statistic: 293.9 on 2 and 836 DF, p-value: < 2.2e-16
summary(fit1 <- lm(english ~ natsci+math+sex ,data=dta))
##
## Call:
## lm(formula = english ~ natsci + math + sex, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1069 -2.3353 0.1648 2.3502 9.7950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.12491 0.71445 2.974 0.00302 **
## natsci 0.36274 0.02715 13.362 < 2e-16 ***
## math 0.25770 0.02554 10.089 < 2e-16 ***
## sex 3.03594 0.25685 11.820 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.489 on 835 degrees of freedom
## Multiple R-squared: 0.4703, Adjusted R-squared: 0.4684
## F-statistic: 247.1 on 3 and 835 DF, p-value: < 2.2e-16
summary(fit1 <- lm( socsci~ english+natsci+math+sex ,data=dta))
##
## Call:
## lm(formula = socsci ~ english + natsci + math + sex, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.6120 -2.1759 0.0401 2.1663 10.7497
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.57021 0.67637 8.236 6.88e-16 ***
## english 0.36860 0.03259 11.310 < 2e-16 ***
## natsci 0.29312 0.02817 10.407 < 2e-16 ***
## math 0.11179 0.02548 4.388 1.29e-05 ***
## sex -0.32434 0.26133 -1.241 0.215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.286 on 834 degrees of freedom
## Multiple R-squared: 0.5476, Adjusted R-squared: 0.5454
## F-statistic: 252.4 on 4 and 834 DF, p-value: < 2.2e-16
summary(fit1 <- lm(vocab ~ socsci+english+natsci+math+sex ,data=dta))
##
## Call:
## lm(formula = vocab ~ socsci + english + natsci + math + sex,
## data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.559 -1.709 -0.008 1.862 8.169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.96816 0.60786 3.238 0.00125 **
## socsci 0.53030 0.02993 17.720 < 2e-16 ***
## english 0.22949 0.03025 7.587 8.76e-14 ***
## natsci 0.06119 0.02587 2.365 0.01826 *
## math 0.06417 0.02227 2.881 0.00406 **
## sex 0.63953 0.22606 2.829 0.00478 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.84 on 833 degrees of freedom
## Multiple R-squared: 0.6542, Adjusted R-squared: 0.6521
## F-statistic: 315.2 on 5 and 833 DF, p-value: < 2.2e-16