6 因素分析與轉軸

dat<-read.table('CESD.dat', header=T)
str(dat)

## 'data.frame':    1000 obs. of  20 variables:
##  $ 悲傷    : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ 恐懼    : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ 寂寞    : int  2 0 1 0 0 0 0 1 0 1 ...
##  $ 痛哭    : int  1 1 1 0 1 0 0 0 0 0 ...
##  $ 煩惱    : int  2 0 2 0 1 0 0 1 0 0 ...
##  $ 失敗    : int  1 0 1 0 1 0 0 0 0 0 ...
##  $ 悶悶不樂: int  1 0 2 0 0 0 0 0 0 1 ...
##  $ 困擾    : int  0 1 1 0 0 0 0 0 0 1 ...
##  $ 費力    : int  1 1 2 1 0 1 0 0 0 1 ...
##  $ 不專心  : int  0 1 1 1 1 1 0 0 0 1 ...
##  $ 睡眠    : int  1 1 0 0 0 1 0 0 0 1 ...
##  $ 寡言    : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ 無力    : int  1 1 2 2 1 1 0 0 0 2 ...
##  $ 胃口不好: int  0 1 0 2 1 1 0 1 0 0 ...
##  $ 不友善  : int  1 1 0 1 0 0 0 0 0 0 ...
##  $ 不喜歡  : int  1 0 2 0 0 0 0 0 0 0 ...
##  $ 良好    : int  3 2 3 3 3 3 0 3 0 2 ...
##  $ 樂趣    : int  3 2 2 2 1 2 0 3 0 1 ...
##  $ 快樂    : int  2 1 2 1 2 2 0 3 0 1 ...
##  $ 希望    : int  2 1 2 2 3 3 0 3 0 3 ...

require(psych)

## Loading required package: psych

fa.parallel(dat, fa = "pc", show.legend = FALSE)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2

print.psych(fa(dat, nfactor = 2, fm = "pa", rotate = "promax"), cut = .3)

## Loading required namespace: GPArotation

## Factor Analysis using method =  pa
## Call: fa(r = dat, nfactors = 2, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##            PA1   PA2   h2   u2 com
## 悲傷      0.84       0.72 0.28 1.0
## 恐懼      0.80       0.63 0.37 1.0
## 寂寞      0.78       0.62 0.38 1.0
## 痛哭      0.73       0.52 0.48 1.0
## 煩惱      0.68       0.42 0.58 1.0
## 失敗      0.61       0.49 0.51 1.2
## 悶悶不樂  0.78       0.68 0.32 1.0
## 困擾      0.56       0.29 0.71 1.0
## 費力      0.66       0.43 0.57 1.0
## 不專心    0.66       0.40 0.60 1.0
## 睡眠      0.67       0.43 0.57 1.0
## 寡言      0.52       0.30 0.70 1.0
## 無力      0.59       0.38 0.62 1.0
## 胃口不好  0.51       0.26 0.74 1.0
## 不友善    0.58       0.32 0.68 1.0
## 不喜歡    0.67       0.50 0.50 1.0
## 良好            0.58 0.32 0.68 1.0
## 樂趣            0.80 0.61 0.39 1.0
## 快樂            0.83 0.72 0.28 1.0
## 希望            0.73 0.55 0.45 1.0
## 
##                        PA1  PA2
## SS loadings           7.27 2.30
## Proportion Var        0.36 0.11
## Cumulative Var        0.36 0.48
## Proportion Explained  0.76 0.24
## Cumulative Proportion 0.76 1.00
## 
##  With factor correlations of 
##      PA1  PA2
## PA1 1.00 0.42
## PA2 0.42 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  190  and the objective function was  10.19 with Chi Square of  10099.94
## The degrees of freedom for the model are 151  and the objective function was  0.88 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  1000 with the empirical chi square  502.28  with prob <  2.6e-39 
## The total number of observations was  1000  with Likelihood Chi Square =  875.06  with prob <  2.3e-102 
## 
## Tucker Lewis Index of factoring reliability =  0.908
## RMSEA index =  0.07  and the 90 % confidence intervals are  0.065 0.074
## BIC =  -168.01
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.97 0.93
## Multiple R square of scores with factors          0.94 0.86
## Minimum correlation of possible factor scores     0.88 0.72

5 區辨函數

dta <- read.table("JobPersonality.txt",head=T)
str(dta)

## 'data.frame':    244 obs. of  4 variables:
##  $ JOB         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ OUTDOOR     : int  10 14 19 14 14 20 6 13 18 16 ...
##  $ SOCIAL      : int  22 17 33 29 25 25 18 27 31 35 ...
##  $ CONSERVATIVE: int  5 6 7 12 7 12 4 7 9 13 ...

library(MASS)
fit <- lda(JOB ~ OUTDOOR+SOCIAL+CONSERVATIVE, data=dta)
fit

## Call:
## lda(JOB ~ OUTDOOR + SOCIAL + CONSERVATIVE, data = dta)
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3483607 0.3811475 0.2704918 
## 
## Group means:
##    OUTDOOR   SOCIAL CONSERVATIVE
## 1 12.51765 24.22353     9.023529
## 2 18.53763 21.13978    10.139785
## 3 15.57576 15.45455    13.242424
## 
## Coefficients of linear discriminants:
##                      LD1         LD2
## OUTDOOR       0.09198065 -0.22501431
## SOCIAL       -0.19427415 -0.04978105
## CONSERVATIVE  0.15499199  0.08734288
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7712 0.2288

fit$scaling

##                      LD1         LD2
## OUTDOOR       0.09198065 -0.22501431
## SOCIAL       -0.19427415 -0.04978105
## CONSERVATIVE  0.15499199  0.08734288

table(predict(fit,newdata =dta)$class , dta$JOB)

##    
##      1  2  3
##   1 68 16  3
##   2 13 67 13
##   3  4 10 50

3 典型相關分析

dta <- read.table("interest.txt",head=T)
c <- dta[, 1:6]
o <- dta[, 7:24]
cor(o, c)# correlations

##               socdom     sociabty      stress        worry     impulsve
## carpentr -0.02655005  0.003410094  0.04521771  0.014433468  0.067667112
## forestr   0.02725324 -0.026765751 -0.06715231 -0.108437803  0.088864778
## morticin  0.11820489  0.067581761  0.12030452  0.164139591 -0.062807848
## policemn -0.16396093 -0.221249337  0.02471191  0.001534416  0.213318537
## fireman  -0.11011277 -0.160250335 -0.05833266 -0.096177857  0.171354498
## salesrep  0.10228283  0.114099861  0.12193224  0.122921806  0.058451390
## teacher   0.25800749  0.222603993  0.05881236  0.073260269 -0.079642912
## busexec   0.16692048  0.085727002  0.07554396  0.056706851 -0.040908120
## stockbrk  0.11213631  0.083486232  0.17100579  0.177674459 -0.044264704
## artist    0.07297586  0.046654544  0.08993349 -0.015093768 -0.088625281
## socworkr  0.20441372  0.235231262  0.07171255  0.087162295 -0.122426518
## truckdvr -0.09728773 -0.060229887 -0.14436082 -0.106540263  0.010379224
## doctor    0.17168029  0.181821452  0.07206254  0.102503492 -0.102827790
## clergymn  0.17872165  0.109124259  0.05161889  0.025933920  0.003002856
## actor     0.16935331  0.194134147  0.06281278  0.017834146  0.016812388
## lawyer    0.20960718  0.216391203  0.01961752  0.125692614 -0.113000711
## archtct   0.16948381  0.089977107  0.08594450  0.067541793 -0.023636384
## landscpr  0.06590459  0.059822828 -0.06899785 -0.098504846  0.104378146
##              thrillsk
## carpentr  0.065088780
## forestr   0.023975205
## morticin  0.002726423
## policemn  0.168304673
## fireman   0.137353641
## salesrep  0.037179601
## teacher  -0.062436080
## busexec  -0.036766020
## stockbrk  0.012823461
## artist   -0.044848256
## socworkr -0.055867851
## truckdvr -0.039882487
## doctor   -0.084253405
## clergymn -0.054379205
## actor    -0.020527488
## lawyer   -0.011759623
## archtct  -0.040691105
## landscpr  0.009815659

#can corr analysis
require(candisc)

## Loading required package: candisc

## Loading required package: car

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## Loading required package: heplots

## 
## Attaching package: 'candisc'

## The following object is masked from 'package:stats':
## 
##     cancor

cc1 <- cancor(c, o, set.names=c("c", "o"))
cc1

## 
## Canonical correlation analysis of:
##   6   c  variables:  socdom, sociabty, stress, worry, impulsve, thrillsk 
##   with    18   o  variables:  carpentr, forestr, morticin, policemn, fireman, salesrep, teacher, busexec, stockbrk, artist, socworkr, truckdvr, doctor, clergymn, actor, lawyer, archtct, landscpr 
## 
##     CanR  CanRSQ   Eigen percent    cum                          scree
## 1 0.4542 0.20627 0.25988  38.235  38.23 ******************************
## 2 0.4094 0.16759 0.20134  29.622  67.86 ***********************       
## 3 0.3084 0.09511 0.10510  15.464  83.32 ************                  
## 4 0.2321 0.05386 0.05692   8.375  91.69 *******                       
## 5 0.1875 0.03517 0.03645   5.362  97.06 ****                          
## 6 0.1400 0.01961 0.02000   2.943 100.00 **                            
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##      CanR LR test stat approx F numDF   denDF  Pr(> F)   
## 1 0.45417      0.53507  1.39033   108 1302.40 0.006627 **
## 2 0.40938      0.67413  1.10168    85 1101.77 0.254085   
## 3 0.30839      0.80985  0.77392    64  894.86 0.902452   
## 4 0.23207      0.89497  0.57602    45  681.08 0.988553   
## 5 0.18752      0.94591  0.46315    28  460.00 0.992216   
## 6 0.14004      0.98039  0.35544    13  231.00 0.981521   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#standarized coefficients 典型相關分析係數
coef(cc1, type="both", standardize=TRUE)

## [[1]]
##                Xcan1      Xcan2      Xcan3       Xcan4      Xcan5
## socdom    0.11616690 0.48545341 -0.8121924  0.05953635 -0.7578881
## sociabty  0.71734183 0.07976508  0.4628032  0.25208246  0.8599887
## stress   -0.13956999 0.63867240  0.1352582 -0.76161225  0.3885810
## worry     0.10403708 0.22125442  0.4957385  0.44817133 -0.5423593
## impulsve -0.59711733 0.33124032 -0.4448030  0.34227633  0.5224877
## thrillsk -0.06965451 0.12818479  0.5868898  0.40864768 -0.3548656
##                Xcan6
## socdom    0.19095155
## sociabty -0.02152472
## stress    0.34871493
## worry    -0.70675916
## impulsve -0.54357133
## thrillsk  0.83382010
## 
## [[2]]
##                Ycan1       Ycan2       Ycan3       Ycan4       Ycan5
## carpentr -0.03806223  0.29761130  0.39269181  0.07820669  0.06142642
## forestr  -0.07556844  0.04333148 -0.26651722 -0.01855674 -0.19668223
## morticin -0.24032070  0.18295837 -0.01901644  0.26527614 -0.64687187
## policemn -0.56831667  0.46421926  0.28136104  0.17751664 -0.29508643
## fireman  -0.24241914 -0.08230669 -0.28692920  0.31911172  0.18512522
## salesrep -0.13744029  0.45610975  0.13311642  0.05692872  0.46679305
## teacher  -0.03474957  0.51116082 -0.41721057  0.38570572 -0.46953964
## busexec   0.10739717  0.05745125 -0.52836667 -0.23890231  0.01124895
## stockbrk -0.15964197  0.57375621  0.39819057 -0.22352599  0.26705045
## artist    0.08401221 -0.03573449  0.05099942 -0.60250043 -0.04821040
## socworkr  0.08471829  0.07997559  0.59593797 -0.09694941  0.44193633
## truckdvr  0.11120624 -0.38004952 -0.04299763  0.17905453  0.01350615
## doctor    0.27240246 -0.16754528 -0.11016365 -0.48830654  0.28847059
## clergymn -0.29287033  0.18798523 -0.51626591 -0.16553149 -0.13426428
## actor     0.24969449  0.27434960  0.02201902  0.32342029  0.49285548
## lawyer    0.70432532 -0.55721785  0.26288947  0.96781654 -0.52870640
## archtct  -0.23377301  0.11984314 -0.34992175 -0.24916190 -0.28873903
## landscpr  0.08964156  0.24736280 -0.32860990  0.37333598  0.29885505
##                 Ycan6
## carpentr  0.269373775
## forestr   0.377868087
## morticin  0.003369288
## policemn -0.155457465
## fireman   0.601112645
## salesrep -0.273051757
## teacher  -0.197784848
## busexec   0.134238755
## stockbrk -0.066826540
## artist    0.567653044
## socworkr  0.759324750
## truckdvr -0.353754478
## doctor   -0.486546731
## clergymn -0.320914003
## actor    -0.023369926
## lawyer    0.296090572
## archtct  -0.149400882
## landscpr  0.003395754

redundancy(cc1)

## 
## Redundancies for the c variables & total X canonical redundancy
## 
##     Xcan1     Xcan2     Xcan3     Xcan4     Xcan5     Xcan6 total X|Y 
##  0.045509  0.036643  0.013075  0.010828  0.003268  0.002535  0.111857 
## 
## Redundancies for the o variables & total Y canonical redundancy
## 
##     Ycan1     Ycan2     Ycan3     Ycan4     Ycan5     Ycan6 total Y|X 
## 0.0274356 0.0158775 0.0043984 0.0020075 0.0027820 0.0006899 0.0531909

plot(cc1, smooth=TRUE)

4 manova

dta <- read.table(file = "nmtwins.txt", header = TRUE)
str(dta)

## 'data.frame':    839 obs. of  6 variables:
##  $ sex    : int  2 2 2 1 1 2 1 1 2 1 ...
##  $ english: int  14 20 11 9 15 20 26 28 24 19 ...
##  $ math   : int  13 20 8 19 23 17 20 31 25 19 ...
##  $ socsci : int  17 16 15 7 23 16 27 31 17 17 ...
##  $ natsci : int  18 16 16 10 21 12 23 29 21 22 ...
##  $ vocab  : int  14 13 12 6 21 12 28 30 22 16 ...

#以MANOVA，檢驗英文、數學、社會科學、自然科學與字彙成績，是否在性別上有差異。
#MANOVA
fit <- manova(cbind(english,math,socsci,natsci,vocab) ~ sex ,data=dta)
summary(fit, test="Wilks")

##            Df   Wilks approx F num Df den Df    Pr(>F)    
## sex         1 0.76565   50.993      5    833 < 2.2e-16 ***
## Residuals 837                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#檢測每一變項
summary.aov(fit)

##  Response english :
##              Df  Sum Sq Mean Sq F value   Pr(>F)   
## sex           1   230.7 230.727  10.185 0.001469 **
## Residuals   837 18961.0  22.654                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response math :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## sex           1  3154.2 3154.21  87.512 < 2.2e-16 ***
## Residuals   837 30168.2   36.04                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response socsci :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## sex           1   268.9 268.869  11.461 0.0007438 ***
## Residuals   837 19635.6  23.459                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response natsci :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## sex           1  1429.8 1429.81  44.811 3.974e-11 ***
## Residuals   837 26706.5   31.91                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response vocab :
##              Df Sum Sq Mean Sq F value Pr(>F)
## sex           1      4  3.9581  0.1706 0.6797
## Residuals   837  19424 23.2064

#Post hoc : Step down analysis
#Bonferroni adjustment of alpha.
#alpha = .05 / 5 =0.01
#排序為：math、natsci、english、socsci、vocab
summary(fit1 <- lm(math ~ sex ,data=dta))

## 
## Call:
## lm(formula = math ~ sex, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.1909  -4.1909  -0.1909   4.7398  15.7398 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  27.1215     0.6961  38.960   <2e-16 ***
## sex          -3.9306     0.4202  -9.355   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.004 on 837 degrees of freedom
## Multiple R-squared:  0.09466,    Adjusted R-squared:  0.09358 
## F-statistic: 87.51 on 1 and 837 DF,  p-value: < 2.2e-16

summary(fit1 <- lm(natsci ~ math+sex ,data=dta))

## 
## Call:
## lm(formula = natsci ~ math + sex, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.039  -2.798   0.526   3.102  11.526 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.22638    0.86461   9.515   <2e-16 ***
## math         0.58105    0.02559  22.703   <2e-16 ***
## sex         -0.36250    0.32698  -1.109    0.268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.445 on 836 degrees of freedom
## Multiple R-squared:  0.4128, Adjusted R-squared:  0.4114 
## F-statistic: 293.9 on 2 and 836 DF,  p-value: < 2.2e-16

summary(fit1 <- lm(english ~ natsci+math+sex ,data=dta))

## 
## Call:
## lm(formula = english ~ natsci + math + sex, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1069  -2.3353   0.1648   2.3502   9.7950 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.12491    0.71445   2.974  0.00302 ** 
## natsci       0.36274    0.02715  13.362  < 2e-16 ***
## math         0.25770    0.02554  10.089  < 2e-16 ***
## sex          3.03594    0.25685  11.820  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.489 on 835 degrees of freedom
## Multiple R-squared:  0.4703, Adjusted R-squared:  0.4684 
## F-statistic: 247.1 on 3 and 835 DF,  p-value: < 2.2e-16

summary(fit1 <- lm( socsci~ english+natsci+math+sex ,data=dta))

## 
## Call:
## lm(formula = socsci ~ english + natsci + math + sex, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6120  -2.1759   0.0401   2.1663  10.7497 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.57021    0.67637   8.236 6.88e-16 ***
## english      0.36860    0.03259  11.310  < 2e-16 ***
## natsci       0.29312    0.02817  10.407  < 2e-16 ***
## math         0.11179    0.02548   4.388 1.29e-05 ***
## sex         -0.32434    0.26133  -1.241    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.286 on 834 degrees of freedom
## Multiple R-squared:  0.5476, Adjusted R-squared:  0.5454 
## F-statistic: 252.4 on 4 and 834 DF,  p-value: < 2.2e-16

summary(fit1 <- lm(vocab ~ socsci+english+natsci+math+sex ,data=dta))

## 
## Call:
## lm(formula = vocab ~ socsci + english + natsci + math + sex, 
##     data = dta)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.559 -1.709 -0.008  1.862  8.169 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.96816    0.60786   3.238  0.00125 ** 
## socsci       0.53030    0.02993  17.720  < 2e-16 ***
## english      0.22949    0.03025   7.587 8.76e-14 ***
## natsci       0.06119    0.02587   2.365  0.01826 *  
## math         0.06417    0.02227   2.881  0.00406 ** 
## sex          0.63953    0.22606   2.829  0.00478 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.84 on 833 degrees of freedom
## Multiple R-squared:  0.6542, Adjusted R-squared:  0.6521 
## F-statistic: 315.2 on 5 and 833 DF,  p-value: < 2.2e-16

efa & manova

Li

2018年1月19日

6 因素分析與轉軸

5 區辨函數

3 典型相關分析

4 manova