#資料來自於 TIMSS 2015 年台灣資料
#讀檔案 女生=1, 男生=2
dta <- read.table(file = "TIMSS2015.dat", header = TRUE)
##以MANOVA,檢驗數學、化學、地科、生物與物理,是否在性別上有差異。請報告檢定值。
#MANOVA
fit <- manova(cbind(MATH,CHEM,EARTH,BIO,PHY) ~ GENDER,data=dta)
summary(fit, test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## GENDER 1 0.88565 147.32 5 5705 < 2.2e-16 ***
## Residuals 5709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#檢測每一變項(其實結果跟一開始做的fit1 <- lm(MATH ~ GENDER ,data=dta)一樣)
summary.aov(fit)
## Response MATH :
## Df Sum Sq Mean Sq F value Pr(>F)
## GENDER 1 10243 10243.2 1.1004 0.2942
## Residuals 5709 53143349 9308.7
##
## Response CHEM :
## Df Sum Sq Mean Sq F value Pr(>F)
## GENDER 1 152283 152283 16.09 6.117e-05 ***
## Residuals 5709 54032597 9464
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response EARTH :
## Df Sum Sq Mean Sq F value Pr(>F)
## GENDER 1 197714 197714 28.881 8.001e-08 ***
## Residuals 5709 39082465 6846
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response BIO :
## Df Sum Sq Mean Sq F value Pr(>F)
## GENDER 1 706 705.7 0.1006 0.7512
## Residuals 5709 40060364 7017.1
##
## Response PHY :
## Df Sum Sq Mean Sq F value Pr(>F)
## GENDER 1 346444 346444 42.549 7.487e-11 ***
## Residuals 5709 46484019 8142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##最簡單的事後檢定是針對各變項做ANOVA,請針對五個變項分別做ANOVA,請報告檢定值以及效果量(η2 = SSeffect/SStotal)。
#Post hoc 1: ANOVA
#Bonferroni adjustment of alpha.
#alpha = .05 / #dv
#ANOVA
fit1 <- lm(MATH ~ GENDER ,data=dta)
summary(fit1)
##
## Call:
## lm(formula = MATH ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -349.81 -59.95 12.93 69.91 254.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 597.037 4.063 146.933 <2e-16 ***
## GENDER 2.679 2.554 1.049 0.294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 96.48 on 5709 degrees of freedom
## Multiple R-squared: 0.0001927, Adjusted R-squared: 1.758e-05
## F-statistic: 1.1 on 1 and 5709 DF, p-value: 0.2942
fit2 <- lm(CHEM~ GENDER ,data=dta)
summary(fit2)
##
## Call:
## lm(formula = CHEM ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -437.75 -60.22 7.80 70.50 249.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 596.159 4.097 145.505 < 2e-16 ***
## GENDER -10.330 2.575 -4.011 6.12e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 97.29 on 5709 degrees of freedom
## Multiple R-squared: 0.00281, Adjusted R-squared: 0.002636
## F-statistic: 16.09 on 1 and 5709 DF, p-value: 6.117e-05
fit3 <- lm(EARTH ~ GENDER ,data=dta)
summary(fit3)
##
## Call:
## lm(formula = EARTH ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411.58 -49.99 7.63 58.39 264.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 564.539 3.485 162.012 <2e-16 ***
## GENDER 11.770 2.190 5.374 8e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 82.74 on 5709 degrees of freedom
## Multiple R-squared: 0.005033, Adjusted R-squared: 0.004859
## F-statistic: 28.88 on 1 and 5709 DF, p-value: 8.001e-08
fit4 <- lm(BIO ~ GENDER ,data=dta)
summary(fit4)
##
## Call:
## lm(formula = BIO ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -353.03 -50.57 7.92 58.63 244.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 568.6311 3.5279 161.182 <2e-16 ***
## GENDER -0.7032 2.2174 -0.317 0.751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 83.77 on 5709 degrees of freedom
## Multiple R-squared: 1.762e-05, Adjusted R-squared: -0.0001575
## F-statistic: 0.1006 on 1 and 5709 DF, p-value: 0.7512
fit5 <- lm(PHY ~ GENDER ,data=dta)
summary(fit5)
##
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411.15 -59.12 6.09 65.47 270.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 539.958 3.800 142.086 < 2e-16 ***
## GENDER 15.581 2.389 6.523 7.49e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared: 0.007398, Adjusted R-squared: 0.007224
## F-statistic: 42.55 on 1 and 5709 DF, p-value: 7.487e-11
##第二種事後檢定是step-down analysis。請依照五個變項ANOVA的效果量大小,進行 step-down analysis。
#Post hoc 2: Step down analysis
#Bonferroni adjustment of alpha.
#alpha = .05 / #dv
#效果量排序由大到小為:PHY、EARTH、CHEM、MATH、BIO
fit1 <- lm(PHY ~ GENDER ,data=dta)
summary(fit1)
##
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411.15 -59.12 6.09 65.47 270.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 539.958 3.800 142.086 < 2e-16 ***
## GENDER 15.581 2.389 6.523 7.49e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared: 0.007398, Adjusted R-squared: 0.007224
## F-statistic: 42.55 on 1 and 5709 DF, p-value: 7.487e-11
fit2 <- lm(EARTH~ GENDER+PHY,data=dta)
summary(fit2)
##
## Call:
## lm(formula = EARTH ~ GENDER + PHY, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187.844 -23.959 0.856 24.909 176.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 124.319973 3.396683 36.600 <2e-16 ***
## GENDER -0.932333 1.006122 -0.927 0.354
## PHY 0.815284 0.005554 146.788 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.87 on 5708 degrees of freedom
## Multiple R-squared: 0.7916, Adjusted R-squared: 0.7915
## F-statistic: 1.084e+04 on 2 and 5708 DF, p-value: < 2.2e-16
fit3 <- lm(CHEM~ GENDER+PHY+EARTH,data=dta)
summary(fit3)
##
## Call:
## lm(formula = CHEM ~ GENDER + PHY + EARTH, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -172.468 -25.254 0.364 25.511 151.231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.52018 3.75218 2.271 0.0232 *
## GENDER -24.98096 1.00031 -24.973 <2e-16 ***
## PHY 0.55501 0.01207 45.999 <2e-16 ***
## EARTH 0.51008 0.01316 38.764 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.65 on 5707 degrees of freedom
## Multiple R-squared: 0.8507, Adjusted R-squared: 0.8507
## F-statistic: 1.084e+04 on 3 and 5707 DF, p-value: < 2.2e-16
fit4 <- lm(MATH~ GENDER+PHY+EARTH+CHEM,data=dta)
summary(fit4)
##
## Call:
## lm(formula = MATH ~ GENDER + PHY + EARTH + CHEM, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -242.054 -38.448 1.361 37.761 224.405
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.39161 5.78611 18.906 <2e-16 ***
## GENDER 1.90379 1.62391 1.172 0.2411
## PHY 0.29322 0.02177 13.467 <2e-16 ***
## EARTH 0.08875 0.02280 3.893 0.0001 ***
## CHEM 0.46835 0.02040 22.954 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 58.03 on 5706 degrees of freedom
## Multiple R-squared: 0.6385, Adjusted R-squared: 0.6383
## F-statistic: 2520 on 4 and 5706 DF, p-value: < 2.2e-16
fit5 <- lm(BIO~ GENDER+PHY+EARTH+CHEM+MATH,data=dta)
summary(fit5)
##
## Call:
## lm(formula = BIO ~ GENDER + PHY + EARTH + CHEM + MATH, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -126.709 -21.956 0.002 21.425 132.474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.211065 3.215572 12.194 < 2e-16 ***
## GENDER -6.597878 0.875572 -7.536 5.63e-14 ***
## PHY 0.128840 0.011924 10.805 < 2e-16 ***
## EARTH 0.482894 0.012306 39.240 < 2e-16 ***
## CHEM 0.202747 0.011496 17.636 < 2e-16 ***
## MATH 0.111165 0.007137 15.576 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.28 on 5705 degrees of freedom
## Multiple R-squared: 0.8606, Adjusted R-squared: 0.8605
## F-statistic: 7046 on 5 and 5705 DF, p-value: < 2.2e-16
##依照理論,決定step-down analysis順序。
#順序:物理、化學、數學、地科、生物
fit1 <- lm(PHY ~ GENDER ,data=dta)
summary(fit1)
##
## Call:
## lm(formula = PHY ~ GENDER, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411.15 -59.12 6.09 65.47 270.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 539.958 3.800 142.086 < 2e-16 ***
## GENDER 15.581 2.389 6.523 7.49e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.23 on 5709 degrees of freedom
## Multiple R-squared: 0.007398, Adjusted R-squared: 0.007224
## F-statistic: 42.55 on 1 and 5709 DF, p-value: 7.487e-11
fit2 <- lm(CHEM~ GENDER+PHY,data=dta)
summary(fit2)
##
## Call:
## lm(formula = CHEM ~ GENDER + PHY, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -192.633 -27.502 0.147 28.402 207.271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.933080 3.795076 18.95 <2e-16 ***
## GENDER -25.456525 1.124129 -22.65 <2e-16 ***
## PHY 0.970865 0.006206 156.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.31 on 5708 degrees of freedom
## Multiple R-squared: 0.8114, Adjusted R-squared: 0.8114
## F-statistic: 1.228e+04 on 2 and 5708 DF, p-value: < 2.2e-16
fit3 <- lm(MATH~ GENDER+PHY+CHEM,data=dta)
summary(fit3)
##
## Call:
## lm(formula = MATH ~ GENDER + PHY + CHEM, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -243.429 -38.384 1.558 37.813 223.766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 117.81682 5.37285 21.928 <2e-16 ***
## GENDER 2.74424 1.61149 1.703 0.0886 .
## PHY 0.33038 0.01960 16.859 <2e-16 ***
## CHEM 0.50462 0.01818 27.763 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 58.1 on 5707 degrees of freedom
## Multiple R-squared: 0.6376, Adjusted R-squared: 0.6374
## F-statistic: 3347 on 3 and 5707 DF, p-value: < 2.2e-16
fit4 <- lm(EARTH~ GENDER+PHY+CHEM+MATH,data=dta)
summary(fit4)
##
## Call:
## lm(formula = EARTH ~ GENDER + PHY + CHEM + MATH, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -157.406 -22.387 -0.108 22.429 129.822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.410599 3.240577 28.208 <2e-16 ***
## GENDER 9.387460 0.933663 10.054 <2e-16 ***
## PHY 0.408720 0.011630 35.144 <2e-16 ***
## CHEM 0.393543 0.011216 35.087 <2e-16 ***
## MATH 0.029852 0.007667 3.893 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.65 on 5706 degrees of freedom
## Multiple R-squared: 0.8355, Adjusted R-squared: 0.8354
## F-statistic: 7245 on 4 and 5706 DF, p-value: < 2.2e-16
fit5 <- lm(BIO~ GENDER+PHY+CHEM+MATH+EARTH,data=dta)
summary(fit5)
##
## Call:
## lm(formula = BIO ~ GENDER + PHY + CHEM + MATH + EARTH, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -126.709 -21.956 0.002 21.425 132.474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.211065 3.215572 12.194 < 2e-16 ***
## GENDER -6.597878 0.875572 -7.536 5.63e-14 ***
## PHY 0.128840 0.011924 10.805 < 2e-16 ***
## CHEM 0.202747 0.011496 17.636 < 2e-16 ***
## MATH 0.111165 0.007137 15.576 < 2e-16 ***
## EARTH 0.482894 0.012306 39.240 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.28 on 5705 degrees of freedom
## Multiple R-squared: 0.8606, Adjusted R-squared: 0.8605
## F-statistic: 7046 on 5 and 5705 DF, p-value: < 2.2e-16
##區辨分析
#Post hoc 3: discriminant analysis
dta <- read.table(file = "Iaddiction.txt", header = TRUE)
#將資料分兩半
dta$IA<-as.factor(dta$IA)
dta$gender<-as.factor(dta$gender)
sid <- sample(1:2000,1000)
dta1 <- dta[sid,]
dta2 <- dta[-sid,]
dim(dta)
## [1] 2000 9
dim(dta1)
## [1] 1000 9
dim(dta2)
## [1] 1000 9
#自尊與同儕關係的散佈圖,並以顏色標示是否網路成癮
require(ggplot2)
## Loading required package: ggplot2
ggplot(dta1, aes(x =esteem, y=peer, color = IA)) +
geom_point()+
theme_bw()

#家庭監督與學校疏離的散佈圖,並以顏色標示是否網路成癮
ggplot(dta1, aes(x =fmon, y=sch, color = IA)) +
geom_point()+
theme_bw()

#自我觀感、同儕關係、學校關係與家庭關係進行區辨分析。
library(MASS)
fit <- lda(IA ~ esteem+peer+sch+fcon+fmon+ffun, data=dta1)
fit
## Call:
## lda(IA ~ esteem + peer + sch + fcon + fmon + ffun, data = dta1)
##
## Prior probabilities of groups:
## 0 1
## 0.821 0.179
##
## Group means:
## esteem peer sch fcon fmon ffun
## 0 29.11515 1.785830 1.762485 1.832724 1.616728 4.501827
## 1 26.28986 1.871508 1.973184 1.992551 1.856611 3.558659
##
## Coefficients of linear discriminants:
## LD1
## esteem -0.1249398
## peer -0.4009488
## sch 1.4642028
## fcon 0.2376808
## fmon 0.5508499
## ffun 0.0212030
#means for each category
fit$means
## esteem peer sch fcon fmon ffun
## 0 29.11515 1.785830 1.762485 1.832724 1.616728 4.501827
## 1 26.28986 1.871508 1.973184 1.992551 1.856611 3.558659
#discriminant function區辨函數
fit$scaling
## LD1
## esteem -0.1249398
## peer -0.4009488
## sch 1.4642028
## fcon 0.2376808
## fmon 0.5508499
## ffun 0.0212030
#proportion explained for each dimensions
prop <- fit$svd^2/sum(fit$svd^2)
prop
## [1] 1
# Exploratory Graph for LDA or QDA
require(klaR)
## Loading required package: klaR
partimat(IA ~ esteem+peer+sch+fcon+fmon+ffun, data=dta2,method="lda")


#以後半資料,測試區辨函數的正確性。
table(predict(fit,newdata =dta2)$class , dta2$IA)
##
## 0 1
## 0 801 177
## 1 11 11
# 預測正確比率82.4%