library(tidyverse)
library(car)
概似函數 = \(\quad \prod_{i=1}^n (1-p)^{X_i-1}*p \quad\)
#3.
#資料
dat=c(2,7,6,4,4,8,1,3,5,2)
#概似函數
LogL <- function(p){
L=0
for (i in 1:10){
L=L+(dat[i]-1)*log(1-p)+log(p)
}
LogL <- L
}
#求最大值。由於 optim 預設是找最小,我們利用 fnscale=-1,將函數
#值乘 -1,這時求極小就是原函數求最大。
optim(0.5, LogL,control=list(fnscale=-1))
## $par
## [1] 0.2380859
##
## $value
## [1] -23.05272
##
## $counts
## function gradient
## 30 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
#p大概是0.24
## Q3
# 1.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
rst[i]<- mean(rnorm(9,170,15))
}
#平均數
round(mean(rst),2)
## [1] 169.89
#標準差
round(sd(rst),2)
## [1] 5.04
# 2.
quantile(rst,c(0.05,0.95))
## 5% 95%
## 161.6380 178.1676
#要樣本平均數小於162才會落入棄卻域,因此推斷三興電池使用長度與一般電池差不多
#1.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
rst[i]<- min(rnorm(9,170,15))
}
#平均數
round(mean(rst),2)
## [1] 147.78
#標準差
round(sd(rst),2)
## [1] 9.09
#2.
quantile(rst,c(0.05,0.95))
## 5% 95%
## 131.8827 161.7528
#樣本最小值小於132會落入棄卻域,因此推斷三興電池使用長度低於一般電池
#1.
rst1 <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
rst1[i]<- mean(rnorm(20))
}
#平均數
round(mean(rst1),2)
## [1] 0
#標準差
round(sd(rst1),2)
## [1] 0.22
#2.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
rst[i]<- mean(rnorm(20,0.5))
}
#平均數
round(mean(rst),2)
## [1] 0.5
#標準差
round(sd(rst),2)
## [1] 0.22
#3.
quantile(rst1,0.95)
## 95%
## 0.3737174
#power
round(1-pnorm(0.37,0.5,0.22),2)
## [1] 0.72
#假設最後研究結果不顯著時,在下結論說心理治療沒有效果時需要小心有大約28%的機率犯型二錯誤,如果希望型二錯誤率下降的話需要增加樣本數
#記得要打header = T,這個意思是告訴她有標題(column names)
dta6 <- read.table("C:/Users/Cheng_wen_sung/Desktop/10Exam/TIMSSJPE.txt",header = T)
head(dta6)#看資料長怎樣
## area gender paedu math chem earth bio phy
## 1 JPN boy SenH 612.1766 549.1075 591.7856 592.7483 624.9121
## 2 JPN boy college 642.7959 586.6755 597.2935 568.5998 658.1473
## 3 JPN boy University 592.0010 552.1962 556.6705 590.9761 567.7777
## 4 JPN boy SenH 554.9173 626.1825 641.9195 578.9409 529.3375
## 5 JPN boy University 689.4872 552.3314 588.1472 593.3991 620.5972
## 6 JPN boy University 668.0994 648.7785 637.1797 646.7870 645.4456
## computer desk book room internet numberbooks
## 1 YES YES YES YES YES 101-200
## 2 YES YES YES YES YES 101-200
## 3 YES YES YES NO YES 101-200
## 4 NO YES YES YES NO 101-200
## 5 YES YES YES YES YES 11-25
## 6 YES YES YES YES YES 26-100
#1.
##我偷懶用其他函數算這題,用老師的方法大概是這樣
#tapply(dta6$math,dta6$paedu,mean)
dta6 %>% group_by(paedu)%>% summarise(mean_math = mean(math))
## # A tibble: 5 × 2
## paedu mean_math
## <fctr> <dbl>
## 1 college 569.1348
## 2 elementary 392.3737
## 3 JunH 488.0429
## 4 SenH 549.3197
## 5 University 601.4606
#2.
##這一段是我在做dummy coding,我用了其他函數做這件事
#
#
##不會的人可以參考老師的,大概會像這樣
#
#
#dummy coding
#D1= (dat6$paedu=="college")
#D2= (dat6$paedu=="elementary")
#D3= (dat6$paedu=="JunH")
#D4= (dat6$paedu=="SenH")
#dta6=data.frame(dta6,D1,D2,D3,D4)
dta6 <- dta6 %>% mutate(D1 = paedu == "college",
D2 = paedu == "elementary",
D3 = paedu == "JunH",
D4 = paedu == "SenH")
#One way ANOVA
summary(r6 <- lm(math ~ D1+D2+D3+D4, data = dta6))
##
## Call:
## lm(formula = math ~ D1 + D2 + D3 + D4, data = dta6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -315.825 -48.200 2.836 55.064 256.915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 601.461 2.087 288.167 < 2e-16 ***
## D1TRUE -32.326 3.521 -9.181 < 2e-16 ***
## D2TRUE -209.087 27.915 -7.490 8.71e-14 ***
## D3TRUE -113.418 9.156 -12.387 < 2e-16 ***
## D4TRUE -52.141 3.130 -16.661 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78.73 on 3415 degrees of freedom
## Multiple R-squared: 0.1103, Adjusted R-squared: 0.1093
## F-statistic: 105.9 on 4 and 3415 DF, p-value: < 2.2e-16
#家長教育程度會影響數學成績,截距代表教育程度為大學以上時學生的數學成績,迴歸式的係數代表當教育程度為大學以外的某一項時,會影響數學成績的程度,例如假設教育程度是專科時,數學成績會是601.46-32.33=569.13
#1.
dta7 <- read.csv("C:/Users/Cheng_wen_sung/Desktop/10Exam/Happiness.csv",header = T)
head(dta7)#看資料長怎樣
## happiness age gender friends pets
## 1 5 24 Male 12 3
## 2 5 28 Male 8 1
## 3 6 25 Female 6 0
## 4 4 26 Male 4 2
## 5 3 20 Female 8 0
## 6 5 25 Male 9 0
##happiness對pets做迴歸
summary(r7_1 <- lm(happiness ~ pets, data = dta7))
##
## Call:
## lm(formula = happiness ~ pets, data = dta7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4203 -1.0232 -0.0232 0.9768 4.1826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.0232 0.2053 19.601 < 2e-16 ***
## pets 0.3971 0.1278 3.107 0.00247 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.496 on 98 degrees of freedom
## Multiple R-squared: 0.08969, Adjusted R-squared: 0.0804
## F-statistic: 9.656 on 1 and 98 DF, p-value: 0.00247
#寵物數量迴歸係數為0.4,顯著
#2.
##多加一個變項
summary(r7_2 <- lm(happiness ~ pets+friends, data = dta7))
##
## Call:
## lm(formula = happiness ~ pets + friends, data = dta7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0884 -1.0050 -0.0642 0.8753 3.6936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.87045 0.41549 6.909 5.14e-10 ***
## pets 0.35074 0.12322 2.846 0.00540 **
## friends 0.17345 0.05508 3.149 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.432 on 97 degrees of freedom
## Multiple R-squared: 0.1741, Adjusted R-squared: 0.1571
## F-statistic: 10.22 on 2 and 97 DF, p-value: 9.345e-05
#寵物數量迴歸係數為0.35,顯著
#model comparison(多做的)
anova(r7_1,r7_2)
## Analysis of Variance Table
##
## Model 1: happiness ~ pets
## Model 2: happiness ~ pets + friends
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 98 219.24
## 2 97 198.91 1 20.333 9.9156 0.002178 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dta8 <- read.table("C:/Users/Cheng_wen_sung/Desktop/10Exam/TIMSSJPE.txt",header = T)
#1.
##迴歸
summary(r8 <- lm(phy~ math+computer+math*computer,data = dta8))
##
## Call:
## lm(formula = phy ~ math + computer + math * computer, data = dta8)
##
## Residuals:
## Min 1Q Median 3Q Max
## -255.659 -36.637 0.507 37.622 201.376
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 115.73011 20.72151 5.585 2.52e-08 ***
## math 0.76778 0.03902 19.679 < 2e-16 ***
## computerYES 26.33508 21.98720 1.198 0.231
## math:computerYES -0.03208 0.04099 -0.783 0.434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.36 on 3416 degrees of freedom
## Multiple R-squared: 0.5406, Adjusted R-squared: 0.5402
## F-statistic: 1340 on 3 and 3416 DF, p-value: < 2.2e-16
#交互作用係數= -0.03,不顯著
#Type III ANOVA
Anova(lm(phy~ math+computer+math*computer,data = dta8, contrasts=list(topic=contr.sum, sys=contr.sum)), type=3)
## Anova Table (Type III tests)
##
## Response: phy
## Sum Sq Df F value Pr(>F)
## (Intercept) 102627 1 31.1925 2.519e-08 ***
## math 1274107 1 387.2537 < 2.2e-16 ***
## computer 4720 1 1.4346 0.2311
## math:computer 2015 1 0.6123 0.4340
## Residuals 11239016 3416
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#2. 從回歸式來看,數學對於物理成績有顯著的影響,但是電腦的有無對於物理成績並沒有顯著影響,而且有電腦也不會讓數學對物理成績的影響更大
#3.
##畫交互作用圖
ggplot(data = dta8, aes(x = math,y = phy,group = computer,color = computer))+
stat_smooth(method = "lm",se = FALSE)+
theme_bw()