library(KMsurv)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(bfeed)
duration 哺乳的時間
delta 使否完成哺乳 (yes=1 , no=0)
race 種族(white=1,black=2, other=3)
poverty 是否貧窮(yes=1, no=0)
smoke 是否吸菸(yes=1, no=0)
alcohol 是否在小孩出生時喝酒(yes=1, no=0)
agemth 媽媽生產年齡
ybirth 出生年份
yschool 媽媽教育程度
pc3mth 第三個月後的產前護理(yes=1, no=0)
par(family="HanziPenTC-W3")
attach(bfeed)
model<-glm(formula =alcohol~yschool,family = binomial(link = "logit"),na.action =
na.exclude)
summary(model)$coefficients %>% round(digits = 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3106 0.7598 -4.3571 0.0000
## yschool 0.0760 0.0603 1.2604 0.2075
predict.glm(model, type="response", newdata=data.frame(yschool=seq(0,10,1))) %>%
plot(ylim=c(0,1),main="教育程度與喝酒與否無相關",xlab="學習時間(年)",ylab="喝酒的機率")
par(family="HanziPenTC-W3")
model<-glm(formula =poverty~yschool,family = binomial(link = "logit"),na.action =
na.exclude)
summary(model)$coefficients %>% round(digits = 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4875 0.6183 5.6401 0
## yschool -0.4221 0.0533 -7.9116 0
predict.glm(model, type="response", newdata=data.frame(yschool=seq(0,10,1))) %>%
plot(ylim=c(0,1),main="教育程度與貧窮有相關",xlab="學習時間(年)",ylab="貧窮的機率")
smokee<-smoke %>% factor()
racee<-race %>% factor()
contrasts(racee)<-contr.treatment(3, base=3)
model<-glm(formula =poverty~smokee+racee,family = binomial(link = "logit"),na.action =
na.exclude)
summary(model)$coefficients %>% round(digits = 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2350 0.1950 -6.3346 0.0000
## smokee1 0.7518 0.1900 3.9562 0.0001
## racee1 -0.8162 0.2307 -3.5380 0.0004
## racee2 0.2965 0.2795 1.0608 0.2888
將估計值取exp()
發現吸煙者貧窮的機會是不吸菸者的兩倍多 若以其他人種當作基準,白人貧窮的機會是其他的0.44倍 黑人貧窮的機會是其他的1.34倍,但不顯著
a<-summary(model)$coefficients %>% exp()
a[,1]
## (Intercept) smokee1 racee1 racee2
## 0.2908258 2.1208652 0.4421109 1.3450998
table(race,poverty) %>% prop.table(.,1)
## poverty
## race 0 1
## 1 0.8504532 0.1495468
## 2 0.6923077 0.3076923
## 3 0.7567568 0.2432432
table(smoke,poverty) %>% prop.table(.,1)
## poverty
## smoke 0 1
## 0 0.8386606 0.1613394
## 1 0.7592593 0.2407407
以上兩個表格分別是:
(i) 給定種族下,貧窮者比例
(ii) 給定是否吸菸下,貧窮者比例
或許是因為把基準點設在race=3 ,所以相較之下黑人之間的平復差距不大,因此不顯著
而在吸菸方面,(84:16) v.s. (76:24)兩者的差距是比較明顯的
smokee<-smoke %>% factor()
racee<-race %>% factor()
contrasts(racee)<-contr.treatment(3, base=2)
model<-glm(formula =poverty~smokee+racee,family = binomial(link = "logit"),na.action =
na.exclude)
summary(model)$coefficients %>% round(digits = 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9386 0.2051 -4.5762 0.0000
## smokee1 0.7518 0.1900 3.9562 0.0001
## racee1 -1.1127 0.2369 -4.6964 0.0000
## racee3 -0.2965 0.2795 -1.0608 0.2888
再把上一個檢定中,種族的基準改成黑人後,其他(非黑非白)的部分依然不顯著
由此可知,其他(非黑非白)與黑人之間的差別並不會對貧窮有太多影響