midterm

library

library(tidyverse)
library(car)

Question 1

1.

概似函數 = \(\quad \prod_{i=1}^n (1-p)^{X_i-1}*p \quad\)

#3.

#資料
dat=c(2,7,6,4,4,8,1,3,5,2)

#概似函數
LogL <- function(p){
  L=0
  for (i in 1:10){
    L=L+(dat[i]-1)*log(1-p)+log(p)
  }
  LogL <- L
}

#求最大值。由於 optim 預設是找最小，我們利用 fnscale=-1，將函數
#值乘 -1，這時求極小就是原函數求最大。
optim(0.5, LogL,control=list(fnscale=-1))

## $par
## [1] 0.2380859
## 
## $value
## [1] -23.05272
## 
## $counts
## function gradient 
##       30       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

#p大概是0.24

Question 3

## Q3
 
# 1.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
   rst[i]<- mean(rnorm(9,170,15)) 
}
#平均數
round(mean(rst),2)

## [1] 169.89

#標準差
round(sd(rst),2)

## [1] 5.04

# 2.
quantile(rst,c(0.05,0.95))

##       5%      95% 
## 161.6380 178.1676

#要樣本平均數小於162才會落入棄卻域，因此推斷三興電池使用長度與一般電池差不多

Question 4

#1.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
  rst[i]<- min(rnorm(9,170,15)) 
}
#平均數
round(mean(rst),2)

## [1] 147.78

#標準差
round(sd(rst),2)

## [1] 9.09

#2.
quantile(rst,c(0.05,0.95))

##       5%      95% 
## 131.8827 161.7528

#樣本最小值小於132會落入棄卻域，因此推斷三興電池使用長度低於一般電池

Question 5

#1.
rst1 <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
  rst1[i]<- mean(rnorm(20)) 
}
#平均數
round(mean(rst1),2)

## [1] 0

#標準差
round(sd(rst1),2)

## [1] 0.22

#2.
rst <- matrix(0,10000,1)
n = 10000
for (i in 1:n){
  rst[i]<- mean(rnorm(20,0.5)) 
}
#平均數
round(mean(rst),2)

## [1] 0.5

#標準差
round(sd(rst),2)

## [1] 0.22

#3.
quantile(rst1,0.95)

##       95% 
## 0.3737174

#power
round(1-pnorm(0.37,0.5,0.22),2)

## [1] 0.72

#假設最後研究結果不顯著時，在下結論說心理治療沒有效果時需要小心有大約28%的機率犯型二錯誤，如果希望型二錯誤率下降的話需要增加樣本數

Question 6

#記得要打header = T，這個意思是告訴她有標題(column names)
dta6 <- read.table("C:/Users/Cheng_wen_sung/Desktop/10Exam/TIMSSJPE.txt",header = T)
head(dta6)#看資料長怎樣

##   area gender      paedu     math     chem    earth      bio      phy
## 1  JPN    boy       SenH 612.1766 549.1075 591.7856 592.7483 624.9121
## 2  JPN    boy    college 642.7959 586.6755 597.2935 568.5998 658.1473
## 3  JPN    boy University 592.0010 552.1962 556.6705 590.9761 567.7777
## 4  JPN    boy       SenH 554.9173 626.1825 641.9195 578.9409 529.3375
## 5  JPN    boy University 689.4872 552.3314 588.1472 593.3991 620.5972
## 6  JPN    boy University 668.0994 648.7785 637.1797 646.7870 645.4456
##   computer desk book room internet numberbooks
## 1      YES  YES  YES  YES      YES     101-200
## 2      YES  YES  YES  YES      YES     101-200
## 3      YES  YES  YES   NO      YES     101-200
## 4       NO  YES  YES  YES       NO     101-200
## 5      YES  YES  YES  YES      YES       11-25
## 6      YES  YES  YES  YES      YES      26-100

#1.

##我偷懶用其他函數算這題，用老師的方法大概是這樣
#tapply(dta6$math,dta6$paedu,mean)
dta6 %>% group_by(paedu)%>% summarise(mean_math = mean(math))

## # A tibble: 5 × 2
##        paedu mean_math
##       <fctr>     <dbl>
## 1    college  569.1348
## 2 elementary  392.3737
## 3       JunH  488.0429
## 4       SenH  549.3197
## 5 University  601.4606

#2.
##這一段是我在做dummy coding，我用了其他函數做這件事
#
#
##不會的人可以參考老師的，大概會像這樣
#
#
#dummy coding
#D1= (dat6$paedu=="college")
#D2= (dat6$paedu=="elementary")
#D3= (dat6$paedu=="JunH")
#D4= (dat6$paedu=="SenH")
#dta6=data.frame(dta6,D1,D2,D3,D4)
dta6 <- dta6 %>% mutate(D1 = paedu == "college",
                        D2 = paedu == "elementary",
                        D3 = paedu == "JunH",
                        D4 = paedu == "SenH")


#One way ANOVA
summary(r6 <- lm(math ~ D1+D2+D3+D4, data  = dta6))

## 
## Call:
## lm(formula = math ~ D1 + D2 + D3 + D4, data = dta6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -315.825  -48.200    2.836   55.064  256.915 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  601.461      2.087 288.167  < 2e-16 ***
## D1TRUE       -32.326      3.521  -9.181  < 2e-16 ***
## D2TRUE      -209.087     27.915  -7.490 8.71e-14 ***
## D3TRUE      -113.418      9.156 -12.387  < 2e-16 ***
## D4TRUE       -52.141      3.130 -16.661  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 78.73 on 3415 degrees of freedom
## Multiple R-squared:  0.1103, Adjusted R-squared:  0.1093 
## F-statistic: 105.9 on 4 and 3415 DF,  p-value: < 2.2e-16

#家長教育程度會影響數學成績，截距代表教育程度為大學以上時學生的數學成績，迴歸式的係數代表當教育程度為大學以外的某一項時，會影響數學成績的程度，例如假設教育程度是專科時，數學成績會是601.46-32.33=569.13

Question 7

#1.
dta7 <- read.csv("C:/Users/Cheng_wen_sung/Desktop/10Exam/Happiness.csv",header = T)
head(dta7)#看資料長怎樣

##   happiness age gender friends pets
## 1         5  24   Male      12    3
## 2         5  28   Male       8    1
## 3         6  25 Female       6    0
## 4         4  26   Male       4    2
## 5         3  20 Female       8    0
## 6         5  25   Male       9    0

##happiness對pets做迴歸
summary(r7_1 <- lm(happiness ~ pets, data  = dta7))

## 
## Call:
## lm(formula = happiness ~ pets, data = dta7)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4203 -1.0232 -0.0232  0.9768  4.1826 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.0232     0.2053  19.601  < 2e-16 ***
## pets          0.3971     0.1278   3.107  0.00247 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.496 on 98 degrees of freedom
## Multiple R-squared:  0.08969,    Adjusted R-squared:  0.0804 
## F-statistic: 9.656 on 1 and 98 DF,  p-value: 0.00247

#寵物數量迴歸係數為0.4，顯著



#2.
##多加一個變項
summary(r7_2 <- lm(happiness ~ pets+friends, data  = dta7))

## 
## Call:
## lm(formula = happiness ~ pets + friends, data = dta7)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0884 -1.0050 -0.0642  0.8753  3.6936 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.87045    0.41549   6.909 5.14e-10 ***
## pets         0.35074    0.12322   2.846  0.00540 ** 
## friends      0.17345    0.05508   3.149  0.00218 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.432 on 97 degrees of freedom
## Multiple R-squared:  0.1741, Adjusted R-squared:  0.1571 
## F-statistic: 10.22 on 2 and 97 DF,  p-value: 9.345e-05

#寵物數量迴歸係數為0.35，顯著

#model comparison(多做的)
anova(r7_1,r7_2)

## Analysis of Variance Table
## 
## Model 1: happiness ~ pets
## Model 2: happiness ~ pets + friends
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     98 219.24                                
## 2     97 198.91  1    20.333 9.9156 0.002178 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Question 8

dta8 <- read.table("C:/Users/Cheng_wen_sung/Desktop/10Exam/TIMSSJPE.txt",header = T)

#1.

##迴歸
summary(r8 <- lm(phy~ math+computer+math*computer,data = dta8))

## 
## Call:
## lm(formula = phy ~ math + computer + math * computer, data = dta8)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -255.659  -36.637    0.507   37.622  201.376 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      115.73011   20.72151   5.585 2.52e-08 ***
## math               0.76778    0.03902  19.679  < 2e-16 ***
## computerYES       26.33508   21.98720   1.198    0.231    
## math:computerYES  -0.03208    0.04099  -0.783    0.434    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 57.36 on 3416 degrees of freedom
## Multiple R-squared:  0.5406, Adjusted R-squared:  0.5402 
## F-statistic:  1340 on 3 and 3416 DF,  p-value: < 2.2e-16

#交互作用係數= -0.03，不顯著

#Type III ANOVA

Anova(lm(phy~ math+computer+math*computer,data = dta8, contrasts=list(topic=contr.sum, sys=contr.sum)), type=3)

## Anova Table (Type III tests)
## 
## Response: phy
##                 Sum Sq   Df  F value    Pr(>F)    
## (Intercept)     102627    1  31.1925 2.519e-08 ***
## math           1274107    1 387.2537 < 2.2e-16 ***
## computer          4720    1   1.4346    0.2311    
## math:computer     2015    1   0.6123    0.4340    
## Residuals     11239016 3416                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#2. 從回歸式來看，數學對於物理成績有顯著的影響，但是電腦的有無對於物理成績並沒有顯著影響，而且有電腦也不會讓數學對物理成績的影響更大

#3.

##畫交互作用圖
ggplot(data = dta8, aes(x = math,y = phy,group = computer,color = computer))+
  stat_smooth(method = "lm",se = FALSE)+
  theme_bw()

midterm

WEN_SUNG

2016年11月25日

library

Question 1

1.

Question 3

Question 4

Question 5

Question 6

Question 7

Question 8