#資料來自於 TIMSS 2015 年台灣資料
#讀檔案 女生=1, 男生=2
dta <- read.table(file = "TIMSS2015.dat", header = TRUE)


##以MANOVA,檢驗數學、化學、地科、生物與物理,是否在性別上有差異。請報告檢定值。
#MANOVA
fit <- manova(cbind(MATH,CHEM,EARTH,BIO,PHY) ~ GENDER,data=dta)
summary(fit, test="Wilks")
##             Df   Wilks approx F num Df den Df    Pr(>F)    
## GENDER       1 0.88565   147.32      5   5705 < 2.2e-16 ***
## Residuals 5709                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#檢測每一變項(其實結果跟一開始做的fit1 <- lm(MATH ~ GENDER ,data=dta)一樣)
summary.aov(fit) 
##  Response MATH :
##               Df   Sum Sq Mean Sq F value Pr(>F)
## GENDER         1    10243 10243.2  1.1004 0.2942
## Residuals   5709 53143349  9308.7               
## 
##  Response CHEM :
##               Df   Sum Sq Mean Sq F value    Pr(>F)    
## GENDER         1   152283  152283   16.09 6.117e-05 ***
## Residuals   5709 54032597    9464                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response EARTH :
##               Df   Sum Sq Mean Sq F value    Pr(>F)    
## GENDER         1   197714  197714  28.881 8.001e-08 ***
## Residuals   5709 39082465    6846                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response BIO :
##               Df   Sum Sq Mean Sq F value Pr(>F)
## GENDER         1      706   705.7  0.1006 0.7512
## Residuals   5709 40060364  7017.1               
## 
##  Response PHY :
##               Df   Sum Sq Mean Sq F value    Pr(>F)    
## GENDER         1   346444  346444  42.549 7.487e-11 ***
## Residuals   5709 46484019    8142                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##最簡單的事後檢定是針對各變項做ANOVA,請針對五個變項分別做ANOVA,請報告檢定值以及效果量(η2 = SSeffect/SStotal)。
#Post hoc 1: ANOVA
#Bonferroni adjustment of alpha.
#alpha = .05 / #dv
#ANOVA
fit1 <- lm(MATH ~ GENDER ,data=dta)
summary(fit1)
## 
## Call:
## lm(formula = MATH ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -349.81  -59.95   12.93   69.91  254.12 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  597.037      4.063 146.933   <2e-16 ***
## GENDER         2.679      2.554   1.049    0.294    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.48 on 5709 degrees of freedom
## Multiple R-squared:  0.0001927,  Adjusted R-squared:  1.758e-05 
## F-statistic:   1.1 on 1 and 5709 DF,  p-value: 0.2942
fit2 <- lm(CHEM~ GENDER ,data=dta)
summary(fit2)
## 
## Call:
## lm(formula = CHEM ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -437.75  -60.22    7.80   70.50  249.07 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  596.159      4.097 145.505  < 2e-16 ***
## GENDER       -10.330      2.575  -4.011 6.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 97.29 on 5709 degrees of freedom
## Multiple R-squared:  0.00281,    Adjusted R-squared:  0.002636 
## F-statistic: 16.09 on 1 and 5709 DF,  p-value: 6.117e-05
fit3 <- lm(EARTH ~ GENDER ,data=dta)
summary(fit3)
## 
## Call:
## lm(formula = EARTH ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -411.58  -49.99    7.63   58.39  264.89 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  564.539      3.485 162.012   <2e-16 ***
## GENDER        11.770      2.190   5.374    8e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 82.74 on 5709 degrees of freedom
## Multiple R-squared:  0.005033,   Adjusted R-squared:  0.004859 
## F-statistic: 28.88 on 1 and 5709 DF,  p-value: 8.001e-08
fit4 <- lm(BIO ~ GENDER ,data=dta)
summary(fit4)
## 
## Call:
## lm(formula = BIO ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -353.03  -50.57    7.92   58.63  244.29 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 568.6311     3.5279 161.182   <2e-16 ***
## GENDER       -0.7032     2.2174  -0.317    0.751    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 83.77 on 5709 degrees of freedom
## Multiple R-squared:  1.762e-05,  Adjusted R-squared:  -0.0001575 
## F-statistic: 0.1006 on 1 and 5709 DF,  p-value: 0.7512
fit5 <- lm(PHY ~ GENDER ,data=dta)
summary(fit5)
## 
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -411.15  -59.12    6.09   65.47  270.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  539.958      3.800 142.086  < 2e-16 ***
## GENDER        15.581      2.389   6.523 7.49e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared:  0.007398,   Adjusted R-squared:  0.007224 
## F-statistic: 42.55 on 1 and 5709 DF,  p-value: 7.487e-11
##第二種事後檢定是step-down analysis。請依照五個變項ANOVA的效果量大小,進行 step-down analysis。
#Post hoc 2: Step down analysis
#Bonferroni adjustment of alpha.
#alpha = .05 / #dv
#效果量排序由大到小為:PHY、EARTH、CHEM、MATH、BIO
fit1 <- lm(PHY ~ GENDER ,data=dta)
summary(fit1)
## 
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -411.15  -59.12    6.09   65.47  270.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  539.958      3.800 142.086  < 2e-16 ***
## GENDER        15.581      2.389   6.523 7.49e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared:  0.007398,   Adjusted R-squared:  0.007224 
## F-statistic: 42.55 on 1 and 5709 DF,  p-value: 7.487e-11
fit2 <- lm(EARTH~ GENDER+PHY,data=dta)
summary(fit2)
## 
## Call:
## lm(formula = EARTH ~ GENDER + PHY, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -187.844  -23.959    0.856   24.909  176.714 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 124.319973   3.396683  36.600   <2e-16 ***
## GENDER       -0.932333   1.006122  -0.927    0.354    
## PHY           0.815284   0.005554 146.788   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.87 on 5708 degrees of freedom
## Multiple R-squared:  0.7916, Adjusted R-squared:  0.7915 
## F-statistic: 1.084e+04 on 2 and 5708 DF,  p-value: < 2.2e-16
fit3 <- lm(CHEM~ GENDER+PHY+EARTH,data=dta)
summary(fit3)
## 
## Call:
## lm(formula = CHEM ~ GENDER + PHY + EARTH, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -172.468  -25.254    0.364   25.511  151.231 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.52018    3.75218   2.271   0.0232 *  
## GENDER      -24.98096    1.00031 -24.973   <2e-16 ***
## PHY           0.55501    0.01207  45.999   <2e-16 ***
## EARTH         0.51008    0.01316  38.764   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.65 on 5707 degrees of freedom
## Multiple R-squared:  0.8507, Adjusted R-squared:  0.8507 
## F-statistic: 1.084e+04 on 3 and 5707 DF,  p-value: < 2.2e-16
fit4 <- lm(MATH~ GENDER+PHY+EARTH+CHEM,data=dta)
summary(fit4)
## 
## Call:
## lm(formula = MATH ~ GENDER + PHY + EARTH + CHEM, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -242.054  -38.448    1.361   37.761  224.405 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 109.39161    5.78611  18.906   <2e-16 ***
## GENDER        1.90379    1.62391   1.172   0.2411    
## PHY           0.29322    0.02177  13.467   <2e-16 ***
## EARTH         0.08875    0.02280   3.893   0.0001 ***
## CHEM          0.46835    0.02040  22.954   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 58.03 on 5706 degrees of freedom
## Multiple R-squared:  0.6385, Adjusted R-squared:  0.6383 
## F-statistic:  2520 on 4 and 5706 DF,  p-value: < 2.2e-16
fit5 <- lm(BIO~ GENDER+PHY+EARTH+CHEM+MATH,data=dta)
summary(fit5)
## 
## Call:
## lm(formula = BIO ~ GENDER + PHY + EARTH + CHEM + MATH, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -126.709  -21.956    0.002   21.425  132.474 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.211065   3.215572  12.194  < 2e-16 ***
## GENDER      -6.597878   0.875572  -7.536 5.63e-14 ***
## PHY          0.128840   0.011924  10.805  < 2e-16 ***
## EARTH        0.482894   0.012306  39.240  < 2e-16 ***
## CHEM         0.202747   0.011496  17.636  < 2e-16 ***
## MATH         0.111165   0.007137  15.576  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.28 on 5705 degrees of freedom
## Multiple R-squared:  0.8606, Adjusted R-squared:  0.8605 
## F-statistic:  7046 on 5 and 5705 DF,  p-value: < 2.2e-16
##依照理論,決定step-down analysis順序。
#順序:物理、化學、數學、地科、生物
fit1 <- lm(PHY ~ GENDER ,data=dta)
summary(fit1)
## 
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -411.15  -59.12    6.09   65.47  270.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  539.958      3.800 142.086  < 2e-16 ***
## GENDER        15.581      2.389   6.523 7.49e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared:  0.007398,   Adjusted R-squared:  0.007224 
## F-statistic: 42.55 on 1 and 5709 DF,  p-value: 7.487e-11
fit2 <- lm(CHEM~ GENDER+PHY,data=dta)
summary(fit2)
## 
## Call:
## lm(formula = CHEM ~ GENDER + PHY, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -192.633  -27.502    0.147   28.402  207.271 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  71.933080   3.795076   18.95   <2e-16 ***
## GENDER      -25.456525   1.124129  -22.65   <2e-16 ***
## PHY           0.970865   0.006206  156.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.31 on 5708 degrees of freedom
## Multiple R-squared:  0.8114, Adjusted R-squared:  0.8114 
## F-statistic: 1.228e+04 on 2 and 5708 DF,  p-value: < 2.2e-16
fit3 <- lm(MATH~ GENDER+PHY+CHEM,data=dta)
summary(fit3)
## 
## Call:
## lm(formula = MATH ~ GENDER + PHY + CHEM, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -243.429  -38.384    1.558   37.813  223.766 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 117.81682    5.37285  21.928   <2e-16 ***
## GENDER        2.74424    1.61149   1.703   0.0886 .  
## PHY           0.33038    0.01960  16.859   <2e-16 ***
## CHEM          0.50462    0.01818  27.763   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 58.1 on 5707 degrees of freedom
## Multiple R-squared:  0.6376, Adjusted R-squared:  0.6374 
## F-statistic:  3347 on 3 and 5707 DF,  p-value: < 2.2e-16
fit4 <- lm(EARTH~ GENDER+PHY+CHEM+MATH,data=dta)
summary(fit4)
## 
## Call:
## lm(formula = EARTH ~ GENDER + PHY + CHEM + MATH, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.406  -22.387   -0.108   22.429  129.822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 91.410599   3.240577  28.208   <2e-16 ***
## GENDER       9.387460   0.933663  10.054   <2e-16 ***
## PHY          0.408720   0.011630  35.144   <2e-16 ***
## CHEM         0.393543   0.011216  35.087   <2e-16 ***
## MATH         0.029852   0.007667   3.893    1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.65 on 5706 degrees of freedom
## Multiple R-squared:  0.8355, Adjusted R-squared:  0.8354 
## F-statistic:  7245 on 4 and 5706 DF,  p-value: < 2.2e-16
fit5 <- lm(BIO~ GENDER+PHY+CHEM+MATH+EARTH,data=dta)
summary(fit5)
## 
## Call:
## lm(formula = BIO ~ GENDER + PHY + CHEM + MATH + EARTH, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -126.709  -21.956    0.002   21.425  132.474 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.211065   3.215572  12.194  < 2e-16 ***
## GENDER      -6.597878   0.875572  -7.536 5.63e-14 ***
## PHY          0.128840   0.011924  10.805  < 2e-16 ***
## CHEM         0.202747   0.011496  17.636  < 2e-16 ***
## MATH         0.111165   0.007137  15.576  < 2e-16 ***
## EARTH        0.482894   0.012306  39.240  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.28 on 5705 degrees of freedom
## Multiple R-squared:  0.8606, Adjusted R-squared:  0.8605 
## F-statistic:  7046 on 5 and 5705 DF,  p-value: < 2.2e-16
##區辨分析
#Post hoc 3: discriminant analysis

dta <- read.table(file = "Iaddiction.txt", header = TRUE)
#將資料分兩半
dta$IA<-as.factor(dta$IA)
dta$gender<-as.factor(dta$gender)
sid <- sample(1:2000,1000)
dta1 <- dta[sid,]
dta2 <- dta[-sid,]
dim(dta)
## [1] 2000    9
dim(dta1)
## [1] 1000    9
dim(dta2)
## [1] 1000    9
#自尊與同儕關係的散佈圖,並以顏色標示是否網路成癮
require(ggplot2)
## Loading required package: ggplot2
ggplot(dta1, aes(x =esteem, y=peer, color = IA)) +
  geom_point()+
  theme_bw()

#家庭監督與學校疏離的散佈圖,並以顏色標示是否網路成癮
ggplot(dta1, aes(x =fmon, y=sch, color = IA)) +
  geom_point()+
  theme_bw()

#自我觀感、同儕關係、學校關係與家庭關係進行區辨分析。
library(MASS)
fit <- lda(IA ~ esteem+peer+sch+fcon+fmon+ffun, data=dta1)
fit
## Call:
## lda(IA ~ esteem + peer + sch + fcon + fmon + ffun, data = dta1)
## 
## Prior probabilities of groups:
##     0     1 
## 0.821 0.179 
## 
## Group means:
##     esteem     peer      sch     fcon     fmon     ffun
## 0 29.11515 1.785830 1.762485 1.832724 1.616728 4.501827
## 1 26.28986 1.871508 1.973184 1.992551 1.856611 3.558659
## 
## Coefficients of linear discriminants:
##               LD1
## esteem -0.1249398
## peer   -0.4009488
## sch     1.4642028
## fcon    0.2376808
## fmon    0.5508499
## ffun    0.0212030
#means for each category
fit$means
##     esteem     peer      sch     fcon     fmon     ffun
## 0 29.11515 1.785830 1.762485 1.832724 1.616728 4.501827
## 1 26.28986 1.871508 1.973184 1.992551 1.856611 3.558659
#discriminant function區辨函數
fit$scaling
##               LD1
## esteem -0.1249398
## peer   -0.4009488
## sch     1.4642028
## fcon    0.2376808
## fmon    0.5508499
## ffun    0.0212030
#proportion explained for each dimensions
prop <- fit$svd^2/sum(fit$svd^2)
prop
## [1] 1
# Exploratory Graph for LDA or QDA
require(klaR)
## Loading required package: klaR
partimat(IA ~ esteem+peer+sch+fcon+fmon+ffun, data=dta2,method="lda")

#以後半資料,測試區辨函數的正確性。
table(predict(fit,newdata =dta2)$class , dta2$IA) 
##    
##       0   1
##   0 801 177
##   1  11  11
# 預測正確比率82.4%