#gender -> math 顯著
summary(r1_1 <- lm(math ~ gender,data = dta1))
##
## Call:
## lm(formula = math ~ gender, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -442.33 -53.77 3.79 59.77 247.52
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 578.534 2.070 279.496 < 2e-16 ***
## gendergirl -9.153 2.853 -3.208 0.00135 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 83.31 on 3418 degrees of freedom
## Multiple R-squared: 0.003002, Adjusted R-squared: 0.00271
## F-statistic: 10.29 on 1 and 3418 DF, p-value: 0.00135
#math -> phy 顯著
summary(r1_2 <- lm(phy ~ math,data = dta1))
##
## Call:
## lm(formula = phy ~ math, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -253.479 -36.717 0.581 37.567 201.178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.94790 6.82300 19.93 <2e-16 ***
## math 0.74482 0.01177 63.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.41 on 3418 degrees of freedom
## Multiple R-squared: 0.5396, Adjusted R-squared: 0.5394
## F-statistic: 4005 on 1 and 3418 DF, p-value: < 2.2e-16
#gender -> phy 顯著
summary(r1_3 <- lm(phy ~ gender,data = dta1))
##
## Call:
## lm(formula = phy ~ gender, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -417.62 -51.64 4.58 57.06 294.82
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 569.474 2.097 271.575 < 2e-16 ***
## gendergirl -11.794 2.890 -4.081 4.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84.4 on 3418 degrees of freedom
## Multiple R-squared: 0.004848, Adjusted R-squared: 0.004557
## F-statistic: 16.65 on 1 and 3418 DF, p-value: 4.596e-05
#gender -> phy 兩者皆顯著(部分中介)
summary(r1_4 <- lm(phy ~ gender+math,data = dta1))
##
## Call:
## lm(formula = phy ~ gender + math, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -256.339 -36.665 1.044 37.076 203.737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 139.51472 6.96099 20.042 <2e-16 ***
## gendergirl -4.99229 1.96748 -2.537 0.0112 *
## math 0.74319 0.01178 63.104 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.36 on 3417 degrees of freedom
## Multiple R-squared: 0.5404, Adjusted R-squared: 0.5402
## F-statistic: 2009 on 2 and 3417 DF, p-value: < 2.2e-16
a <- summary(r1_1)$coefficients['gendergirl','Estimate']
ase <- summary(r1_1)$coefficients['gendergirl','Std. Error']
b <- summary(r1_4)$coefficients['math','Estimate']
bse <- summary(r1_4)$coefficients['math','Std. Error']
ab <- a*b
abse <- sqrt(ase*ase*b*b+bse*bse*a*a)
zab <- ab/abse
pab <- (1-pnorm(abs(zab)))*2
zab##z值
## [1] -3.203734
pab
## [1] 0.001356575
cresult1 <- matrix(NA,5000,1)
n1 <- dim(dta1)[1]
for (i in 1:5000){
#抽樣,製造新的樣本。
newdat <- dta1[sample(1:n1,n1,replace=T),]
#在新樣本上跑迴歸,擷取迴歸係數計算中介效果
r1_5<-lm(math ~ gender,data=newdat)
r1_6<-lm(phy ~ gender+math,data=newdat)
a<-summary(r1_5)$coefficients['gendergirl','Estimate']
b<-summary(r1_6)$coefficients['math','Estimate']
ab<-a*b
cresult1[i]<-ab
}
quantile(cresult1,c(.025,.975))##信賴區間
## 2.5% 97.5%
## -10.909362 -2.677779
cresult2 <- matrix(NA,5000,1)
n2 <- dim(dta2)[1]
for (i in 1:5000){
#抽樣,製造新的樣本。
newdat2 <- dta2[sample(1:n2,n2,replace=T),]
#算變異係數
cresult2[i] <- sd(newdat2)/mean(newdat2)
}
quantile(cresult2,c(.025,.975))##95%信賴區間
## 2.5% 97.5%
## 0.1203109 0.1551773
dta3%>%group_by(paedu)%>%summarize(rate = sum(computer == "YES")/(sum(computer == "YES")+sum(computer == "NO")))
## # A tibble: 5 × 2
## paedu rate
## <fctr> <dbl>
## 1 college 0.9156939
## 2 elementary 0.7500000
## 3 JunH 0.7051282
## 4 SenH 0.8491228
## 5 University 0.9613493
dta3 <- dta3%>%mutate(D1 = paedu == "JunH",
D2 = paedu == "SenH",
D3 = paedu == "University",
D4 = paedu == "college")
dta3 <- dta3%>%mutate(D1 = as.factor(D1),
D2 = as.factor(D2),
D3 = as.factor(D3),
D4 = as.factor(D4))
summary(r3 <- glm(computer ~ D1 + D2 + D3 + D4, data = dta3 ,family = "binomial"))
##
## Call:
## glm(formula = computer ~ D1 + D2 + D3 + D4, family = "binomial",
## data = dta3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5508 0.2808 0.4197 0.5719 0.8359
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0986 0.8165 1.346 0.1785
## D1TRUE -0.2268 0.8534 -0.266 0.7905
## D2TRUE 0.6291 0.8207 0.767 0.4433
## D3TRUE 2.1152 0.8280 2.555 0.0106 *
## D4TRUE 1.2866 0.8267 1.556 0.1196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2111.6 on 3419 degrees of freedom
## Residual deviance: 1982.4 on 3415 degrees of freedom
## AIC: 1992.4
##
## Number of Fisher Scoring iterations: 6
#1.
24/18
## [1] 1.333333
#2.
ppois(0,(24/18))
## [1] 0.2635971
#3.
qpois(0.8,(24/18))
## [1] 2
#1.
exp(3.54934)
## [1] 34.79035
#2.
exp(3.54934-(5*0.17915))
## [1] 14.20494
#3.
5*2.5
## [1] 12.5
#4.
(3.54934-log(12.5))/0.17915
## [1] 5.713711
dta6 <- read.table("C:/Users/Cheng_wen_sung/Desktop/16Exam/Q6.txt",header = T)
str(dta6)
## 'data.frame': 500 obs. of 3 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ hw : int 20 25 17 15 12 21 13 14 17 18 ...
## $ qno: int 8 8 4 3 3 3 5 0 3 5 ...
# set up linear spline
dta6 <- dta6 %>%
mutate( hw_intox = factor(ifelse(hw >= 25, "Yes", "No")),
hw_above = ifelse(hw < 25, 0, hw - 25))
#ggplot with three fitted line
ggplot(data = dta6,aes(x = hw , y = qno))+
geom_point(alpha = 0.5)+
stat_smooth(method = "lm",se = F,linetype = "dashed")+
geom_vline(xintercept = 25, linetype = "dotted") +
stat_smooth(aes(group = hw_intox), method = "lm", se = F) +
theme_bw()
#simple linear model
summary(r6_1 <- lm(qno ~ hw , data = dta6))
##
## Call:
## lm(formula = qno ~ hw, data = dta6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.843 -1.460 -0.311 1.226 34.795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.43329 0.37038 -6.57 1.27e-10 ***
## hw 0.38298 0.02374 16.13 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.54 on 498 degrees of freedom
## Multiple R-squared: 0.3432, Adjusted R-squared: 0.3419
## F-statistic: 260.3 on 1 and 498 DF, p-value: < 2.2e-16
#piecewise model
summary(r6_2 <- segmented(r6_1, seg.Z = ~ hw, psi = 25))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = r6_1, seg.Z = ~hw, psi = 25)
##
## Estimated Break-Point(s):
## Est. St.Err
## 29.616 0.192
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.35864 0.29346 -4.63 4.68e-06 ***
## hw 0.30357 0.01898 15.99 < 2e-16 ***
## U1.hw 10.14687 0.90603 11.20 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.957 on 496 degrees of freedom
## Multiple R-Squared: 0.6118, Adjusted R-squared: 0.6095
##
## Convergence attained in 2 iterations with relative change 1.197219e-16
# plot model fit
plot(r6_2)
with(dta6, points(hw, qno))
abline(v = r6_2$psi[2], lty = 3)
grid()
dta7 <- read.table("C:/Users/Cheng_wen_sung/Desktop/16Exam/Q7.txt")
str(dta7)
## 'data.frame': 1976 obs. of 7 variables:
## $ V1: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V2: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V3: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V4: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V5: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V6: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V7: num 1 1.2 1.2 1.2 1.2 1.4 1.4 1.4 1.4 1.4 ...
colnames(dta7) <- c("insomnia","gender","twelfth","coffee","conflict","supervise","alienation")
dta7 <- dta7%>%mutate(insomnia = as.factor(insomnia),
gender = as.factor(gender),
twelfth = as.factor(twelfth),
coffee = as.factor(coffee))
summary(r7 <- glm(insomnia ~ conflict + conflict:twelfth + twelfth+
alienation + alienation:conflict +
alienation:twelfth + alienation:conflict:twelfth,
family = "binomial",data = dta7))
##
## Call:
## glm(formula = insomnia ~ conflict + conflict:twelfth + twelfth +
## alienation + alienation:conflict + alienation:twelfth + alienation:conflict:twelfth,
## family = "binomial", data = dta7)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0319 -0.5851 -0.4851 -0.3595 2.6817
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5679 1.3671 -4.804 1.55e-06 ***
## conflict 1.4379 0.6358 2.261 0.0237 *
## twelfth1 4.2609 2.5394 1.678 0.0934 .
## alienation 1.6882 0.6753 2.500 0.0124 *
## conflict:twelfth1 -1.1690 1.2215 -0.957 0.3386
## conflict:alienation -0.3864 0.3045 -1.269 0.2046
## twelfth1:alienation -1.7840 1.3251 -1.346 0.1782
## conflict:twelfth1:alienation 0.4736 0.6149 0.770 0.4412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1512.2 on 1975 degrees of freedom
## Residual deviance: 1449.5 on 1968 degrees of freedom
## AIC: 1465.5
##
## Number of Fisher Scoring iterations: 5
exp(cbind(coef=coef(r7),confint(r7)))
## coef 2.5 % 97.5 %
## (Intercept) 0.001404685 0.0000916246 1.985185e-02
## conflict 4.211637446 1.2195944803 1.495185e+01
## twelfth1 70.871216173 0.4514788840 9.764706e+03
## alienation 5.409996386 1.4428463254 2.066859e+01
## conflict:twelfth1 0.310677789 0.0292491076 3.585649e+00
## conflict:alienation 0.679522819 0.3696799032 1.232210e+00
## twelfth1:alienation 0.167966851 0.0125365900 2.304951e+00
## conflict:twelfth1:alienation 1.605751713 0.4671521520 5.288049e+00