Q2 BMI
##Fit an appropriate model to describe the relationship between age
#and bmi in the body mass index data
require(gamlss)
## Loading required package: gamlss
## Warning: package 'gamlss' was built under R version 3.4.3
## Loading required package: splines
## Loading required package: gamlss.data
## Loading required package: gamlss.dist
## Warning: package 'gamlss.dist' was built under R version 3.4.3
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: parallel
## ********** GAMLSS Version 5.0-5 **********
## For more on GAMLSS look at http://www.gamlss.org/
## Type gamlssNews() to see new features/changes/bug fixes.
data("dbbmi", package = "gamlss")
## Warning in data("dbbmi", package = "gamlss"): data set 'dbbmi' not found
str(dbbmi)
## 'data.frame': 7294 obs. of 2 variables:
## $ age: num 0.03 0.04 0.04 0.04 0.04 0.05 0.05 0.05 0.05 0.05 ...
## $ bmi: num 13.2 12.4 14.5 11.8 15.3 ...
head(dbbmi)
## age bmi
## 1 0.03 13.23529
## 2 0.04 12.43877
## 3 0.04 14.54177
## 4 0.04 11.77395
## 5 0.04 15.32561
## 6 0.05 13.21439
require(ggplot2)
## Loading required package: ggplot2
plot(dbbmi, pch=20,cex=.1)

# simple regression model
m0 <- lm(bmi ~ age, data = dbbmi)
# piecewise regression to choose knots
# psi設定要找的區間
require(segmented)
## Loading required package: segmented
## Warning: package 'segmented' was built under R version 3.4.3
segmented(m0, seg.Z = ~ age, psi = c(3, 20), data =dbbmi)
## Call: segmented.lm(obj = m0, seg.Z = ~age, psi = c(3, 20), data = dbbmi)
##
## Meaningful coefficients of the linear terms:
## (Intercept) age U1.age U2.age
## 13.9078 8.9143 -9.3317 0.9494
##
## Estimated Break-Point(s):
## psi1.age psi2.age
## 0.3883 6.2926
# set up a linear spline at 8
a8 <- (dbbmi$age - 8)
a8[a8 < 0] <- 0
# set up a linear spline at 18
a18 <- (dbbmi$age - 18)
a18[a18 < 0] <- 0
# the linear spline basis functions
# fit the linear spline
summary(m1 <- lm(bmi ~ age + a8 + a18, data = dbbmi ))
##
## Call:
## lm(formula = bmi ~ age + a8 + a18, data = dbbmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9781 -1.3752 -0.2268 1.0825 15.1185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.56039 0.05103 324.517 < 2e-16 ***
## age -0.11548 0.01178 -9.803 < 2e-16 ***
## a8 0.68208 0.02121 32.157 < 2e-16 ***
## a18 -0.24916 0.06764 -3.683 0.000232 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.128 on 7290 degrees of freedom
## Multiple R-squared: 0.4646, Adjusted R-squared: 0.4644
## F-statistic: 2109 on 3 and 7290 DF, p-value: < 2.2e-16
# fit the natural splines model at two knots
require(splines)
summary(m2 <- lm(bmi ~ ns(age, knots = c(8, 18)), data = dbbmi))
##
## Call:
## lm(formula = bmi ~ ns(age, knots = c(8, 18)), data = dbbmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.964 -1.384 -0.219 1.076 15.131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.65712 0.05403 308.32 <2e-16 ***
## ns(age, knots = c(8, 18))1 4.30095 0.13069 32.91 <2e-16 ***
## ns(age, knots = c(8, 18))2 4.07593 0.22312 18.27 <2e-16 ***
## ns(age, knots = c(8, 18))3 7.21840 0.14023 51.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.129 on 7290 degrees of freedom
## Multiple R-squared: 0.4638, Adjusted R-squared: 0.4636
## F-statistic: 2102 on 3 and 7290 DF, p-value: < 2.2e-16
summary(m3 <- lm(bmi ~ ns(age, df = 3), data =dbbmi))
##
## Call:
## lm(formula = bmi ~ ns(age, df = 3), data = dbbmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9419 -1.3960 -0.2083 1.0917 15.3131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.69942 0.06135 272.18 <2e-16 ***
## ns(age, df = 3)1 1.64189 0.12810 12.82 <2e-16 ***
## ns(age, df = 3)2 3.75807 0.20586 18.25 <2e-16 ***
## ns(age, df = 3)3 7.61545 0.11104 68.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.139 on 7290 degrees of freedom
## Multiple R-squared: 0.4592, Adjusted R-squared: 0.4589
## F-statistic: 2063 on 3 and 7290 DF, p-value: < 2.2e-16
# see where the knots are
attr(terms(m3), "predvars")
## list(bmi, ns(age, knots = c(3.3, 13.65), Boundary.knots = c(0.03,
## 21.7), intercept = FALSE))
# plot data and mode fits
plot(bmi ~ age, data = dbbmi, col = "gray20", cex = .1,
xlab = "Age", ylab = "bmi")
abline(v = c(8, 18), lty = 3) # lty= line type, 3=dotted
grid()
lines(dbbmi$age, fitted(m1), col="darkred", lty = 2, lwd=1.5 )
lines(dbbmi$age, fitted(m2), col = "steelblue")
lines(dbbmi$age, fitted(m3), col = "skyblue")

# compare model fits
anova(m0, m1, m2, m3)
## Analysis of Variance Table
##
## Model 1: bmi ~ age
## Model 2: bmi ~ age + a8 + a18
## Model 3: bmi ~ ns(age, knots = c(8, 18))
## Model 4: bmi ~ ns(age, df = 3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7292 38522
## 2 7290 33005 2 5516.4 609.22 < 2.2e-16 ***
## 3 7290 33057 0 -51.4
## 4 7290 33342 0 -285.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#m1 is the best
Q3 union membership, wage and gender
#relationship between union membership and wage and gender
#generalized additive model
require(AER)
## Loading required package: AER
## Warning: package 'AER' was built under R version 3.4.3
## Loading required package: car
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
data("CPS1985", package = "AER")
dat<-CPS1985
str(dat)
## 'data.frame': 534 obs. of 11 variables:
## $ wage : num 5.1 4.95 6.67 4 7.5 ...
## $ education : num 8 9 12 12 12 13 10 12 16 12 ...
## $ experience: num 21 42 1 4 17 9 27 9 11 9 ...
## $ age : num 35 57 19 22 35 28 43 27 33 27 ...
## $ ethnicity : Factor w/ 3 levels "cauc","hispanic",..: 2 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 2 levels "south","other": 2 2 2 2 2 2 1 2 2 2 ...
## $ gender : Factor w/ 2 levels "male","female": 2 2 1 1 1 1 1 1 1 1 ...
## $ occupation: Factor w/ 6 levels "worker","technical",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sector : Factor w/ 3 levels "manufacturing",..: 1 1 1 3 3 3 3 3 1 3 ...
## $ union : Factor w/ 2 levels "no","yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ married : Factor w/ 2 levels "no","yes": 2 2 1 1 2 1 1 1 2 1 ...
require(ggplot2)
#看一下男女有沒有加入公會與薪資的關係
ggplot(dat, aes(x = union, y = wage, color = gender))+
stat_summary(fun.data = mean_se)+
theme_bw()+
labs(x = "union", y = "wage")

summary(m0 <- glm(union ~ gender + wage, data = dat, family = "binomial"))
##
## Call:
## glm(formula = union ~ gender + wage, family = "binomial", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2310 -0.6685 -0.5170 -0.4363 2.2196
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.80629 0.26013 -6.944 3.81e-12 ***
## genderfemale -0.74873 0.24908 -3.006 0.00265 **
## wage 0.06023 0.02025 2.974 0.00294 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 503.08 on 533 degrees of freedom
## Residual deviance: 481.00 on 531 degrees of freedom
## AIC: 487
##
## Number of Fisher Scoring iterations: 4
require(effects)
## Loading required package: effects
## Warning: package 'effects' was built under R version 3.4.3
## Loading required package: carData
##
## Attaching package: 'carData'
## The following objects are masked from 'package:car':
##
## Guyer, UN, Vocab
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
# effects plot
plot(effect("gender", mod = m0), ylab="Probability", xlab="gender", main="")

plot(effect("wage", mod = m0), ylab="Probability", xlab="wage", main="")

require(mgcv)
## Loading required package: mgcv
## This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.
summary(m1 <- gam(union ~ gender+ s(wage, by=gender), data = dat, family = "binomial"))
##
## Family: binomial
## Link function: logit
##
## Formula:
## union ~ gender + s(wage, by = gender)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4376 0.1752 -8.205 2.3e-16 ***
## genderfemale -0.5666 0.2693 -2.104 0.0354 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(wage):gendermale 3.431 4.262 21.059 0.000395 ***
## s(wage):genderfemale 2.081 2.623 6.731 0.068691 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0849 Deviance explained = 9.73%
## UBRE = -0.12143 Scale est. = 1 n = 534
anova(m0, m1, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: union ~ gender + wage
## Model 2: union ~ gender + s(wage, by = gender)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 531.00 481.00
## 2 526.49 454.13 4.5122 26.868 3.685e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#m1 is better
Q4mortality rates in Sweden
#analysis in mortality rates in Sweden
#Mortality rate (count/exposure) follows a Poisson distribution(λ).
#log(count/exposure) = log(count) - log(exposure).
dat = read.csv("http://140.116.183.121/~sheu/lmm/Data/mortality_sweden.csv",sep=",", h = T)
str(dat)
## 'data.frame': 522 obs. of 4 variables:
## $ Year : int 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 ...
## $ Count : int 20406 15944 15669 18365 16631 19237 21400 26392 21451 21873 ...
## $ Exposure: num 562888 561540 553564 542766 536570 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
require(lattice)
## Loading required package: lattice
require(mgcv)
xyplot(log(Count/Exposure) ~ Year, groups=Gender, data = dat,
col = c("tomato", "steelblue"), type = c("p","g","smooth") )

# augument the exposure term in log
dat$le <- log(dat$Exposure)
# log(E[Yti]) = log(eti) + β0 + β1 genderi + s(yeart)
# s= definition of smooth terms within gam model formulae
summary(m0 <- gam(Count ~ offset(le) + Gender + s(Year), family = poisson, data = dat))
##
## Family: poisson
## Link function: log
##
## Formula:
## Count ~ offset(le) + Gender + s(Year)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.0097738 0.0003343 -11995.9 <2e-16 ***
## GenderM 0.3013409 0.0004515 667.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Year) 8.991 9 5622270 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.755 Deviance explained = 94.1%
## UBRE = 750.16 Scale est. = 1 n = 522
#log(E[Yti]) = log(eti) + β0 + β1 genderi + si(yeart)
# different smoothing curves by gender
summary(m1 <- gam(Count ~ offset(le) + Gender + s(Year, by = Gender), family = poisson, data = dat))
##
## Family: poisson
## Link function: log
##
## Formula:
## Count ~ offset(le) + Gender + s(Year, by = Gender)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.0064015 0.0003363 -11913.6 <2e-16 ***
## GenderM 0.2846730 0.0004641 613.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Year):GenderF 8.992 9 3091133 <2e-16 ***
## s(Year):GenderM 8.996 9 2559070 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.855 Deviance explained = 96.2%
## UBRE = 479.12 Scale est. = 1 n = 522
# overdispersion Poisson
summary(m2 <- gam(Count ~ offset(le) + Gender + s(Year, by = Gender),scale = -1,
family = poisson, data = dat))
##
## Family: poisson
## Link function: log
##
## Formula:
## Count ~ offset(le) + Gender + s(Year, by = Gender)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.006341 0.007822 -512.20 <2e-16 ***
## GenderM 0.284659 0.010796 26.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Year):GenderF 8.678 8.970 636.6 <2e-16 ***
## s(Year):GenderM 8.807 8.989 525.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.855 Deviance explained = 96.2%
## GCV = 518.54 Scale est. = 541.13 n = 522
anova(m0,m1,m2,test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Count ~ offset(le) + Gender + s(Year)
## Model 2: Count ~ offset(le) + Gender + s(Year, by = Gender)
## Model 3: Count ~ offset(le) + Gender + s(Year, by = Gender)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 511.00 392082
## 2 502.00 250582 9.000001 141501 < 2.2e-16 ***
## 3 502.04 250845 -0.041649 -263 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#m1 is the best
#fitted values
dat$yhat <- fitted(m1)
require(ggplot2)
# m1預測值(線)與觀察值
ggplot(dat, aes(Year, Count/Exposure, color = Gender)) +
geom_point(pch = 1) +
geom_line(aes(Year, yhat/Exposure)) +
guides(colour = guide_legend(reverse = T))+ #order of legends is reversed
theme_bw() +
theme()

Q5 Prestige Data
require(car)
data(Prestige)
dat<-Prestige
str(dat)
## 'data.frame': 102 obs. of 6 variables:
## $ education: num 13.1 12.3 12.8 11.4 14.6 ...
## $ income : int 12351 25879 9271 8865 8403 11030 8258 14163 11377 11023 ...
## $ women : num 11.16 4.02 15.7 9.11 11.68 ...
## $ prestige : num 68.8 69.1 63.4 56.8 73.5 77.6 72.6 78.1 73.1 68.8 ...
## $ census : int 1113 1130 1171 1175 2111 2113 2133 2141 2143 2153 ...
## $ type : Factor w/ 3 levels "bc","prof","wc": 2 2 2 2 2 2 2 2 2 2 ...
#依老師的圖示的label
dat$es[dat$education >=6.37 & dat$education <= 9.58]<- 1
dat$es[dat$education >9.58 & dat$education <= 12.8]<- 2
dat$es[dat$education >12.8 & dat$education <= 16]<- 3
dat$es<-as.factor(dat$es)
dat$es<-factor(dat$es, levels=c(1,2,3), labels=c("6.37-9.58", "9.58-12.8", "12.8-16"))
#
require(ggplot2)
ggplot(dat, aes(income, prestige))+
geom_point(pch=1, colour="blue")+
stat_smooth(se = F)+
facet_wrap(~es)+
theme_bw()
## `geom_smooth()` using method = 'loess'

#以收入的四分位數作為觀察類別
quantile(dat$income)
## 0% 25% 50% 75% 100%
## 611.00 4106.00 5930.50 8187.25 25879.00
dat$ei[dat$income <=4106]<- 1
dat$ei[dat$income >4106 & dat$income <= 5930.5]<- 2
dat$ei[dat$income >5930.5 & dat$income <= 8187.25]<- 3
dat$ei[dat$income >8187.25 & dat$income <= 25879]<- 4
dat$ei<-factor(dat$ei, levels=c(1,2,3,4))
#
ggplot(dat, aes(education, prestige))+
geom_point(pch=1, colour="blue")+
stat_smooth(se = F)+
facet_wrap(~ei,ncol = 4)+
theme_bw()
## `geom_smooth()` using method = 'loess'

summary(m0 <- gam(prestige ~ s(income) + s(education), data = dat, family = gaussian))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## prestige ~ s(income) + s(education)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.8333 0.6889 67.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(income) 3.118 3.877 14.61 1.53e-09 ***
## s(education) 3.177 3.952 38.78 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.836 Deviance explained = 84.7%
## GCV = 52.143 Scale est. = 48.414 n = 102
plot(m0, pages = 1, shift = m0$coef[1])

vis.gam(m0, theta = 29, phi = 13, ticktype = "detailed",
se = 2)

Q6 kyphosis data
#kyphosis{rpart} data set
require(rpart)
## Loading required package: rpart
data(kyphosis)
dat<-kyphosis
str(dat)
## 'data.frame': 81 obs. of 4 variables:
## $ Kyphosis: Factor w/ 2 levels "absent","present": 1 1 2 1 1 1 1 1 1 2 ...
## $ Age : int 71 158 128 2 1 1 61 37 113 59 ...
## $ Number : int 3 3 4 5 4 2 2 3 2 6 ...
## $ Start : int 5 14 5 1 15 16 17 16 16 12 ...
summary(m0 <- glm(Kyphosis ~ Age + Number + Start, data = dat, family = "binomial"))
##
## Call:
## glm(formula = Kyphosis ~ Age + Number + Start, family = "binomial",
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3124 -0.5484 -0.3632 -0.1659 2.1613
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.036934 1.449575 -1.405 0.15996
## Age 0.010930 0.006446 1.696 0.08996 .
## Number 0.410601 0.224861 1.826 0.06785 .
## Start -0.206510 0.067699 -3.050 0.00229 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.234 on 80 degrees of freedom
## Residual deviance: 61.380 on 77 degrees of freedom
## AIC: 69.38
##
## Number of Fisher Scoring iterations: 5
# effects plot
plot(effect("Age", mod = m0), ylab="Probability", xlab="Age", main="")

plot(effect("Number", mod = m0), ylab="Probability", xlab="Number", main="")

plot(effect("Start", mod = m0), ylab="Probability", xlab="Start", main="")

summary(m1 <- gam(Kyphosis ~ s(Age) + s(Start)+ Number , data = dat, family = "binomial"))
##
## Family: binomial
## Link function: logit
##
## Formula:
## Kyphosis ~ s(Age) + s(Start) + Number
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5926 1.1464 -3.134 0.00172 **
## Number 0.3333 0.2324 1.434 0.15148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Age) 2.209 2.790 6.305 0.0795 .
## s(Start) 2.020 2.523 9.645 0.0146 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.355 Deviance explained = 39.4%
## UBRE = -0.22384 Scale est. = 1 n = 81
summary(m2 <- gam(Kyphosis ~ s(Age) + s(Start) , data = dat, family = "binomial"))
##
## Family: binomial
## Link function: logit
##
## Formula:
## Kyphosis ~ s(Age) + s(Start)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1723 0.4846 -4.483 7.37e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Age) 2.183 2.761 6.346 0.08054 .
## s(Start) 2.199 2.740 12.798 0.00421 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.33 Deviance explained = 37.1%
## UBRE = -0.2204 Scale est. = 1 n = 81
anova(m1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## Kyphosis ~ s(Age) + s(Start) + Number
##
## Parametric Terms:
## df Chi.sq p-value
## Number 1 2.057 0.151
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Age) 2.209 2.790 6.305 0.0795
## s(Start) 2.020 2.523 9.645 0.0146
#其實age不用配曲線(?)
anova(m0,m1, m2)
## Analysis of Deviance Table
##
## Model 1: Kyphosis ~ Age + Number + Start
## Model 2: Kyphosis ~ s(Age) + s(Start) + Number
## Model 3: Kyphosis ~ s(Age) + s(Start)
## Resid. Df Resid. Dev Df Deviance
## 1 77.000 61.380
## 2 74.770 50.410 2.22951 10.9701
## 3 75.618 52.383 -0.84749 -1.9735
AIC(m0,m1, m2)
## df AIC
## m0 4.000000 69.37993
## m1 6.229507 62.86889
## m2 5.382020 63.14741
#m1 is better
vis.gam(m1, theta = 35, phi = 15, tick="detailed" )

vis.gam(m1, theta = -35, phi = 15, tick="detailed" )

Q7 ERP data
##Include subjects as random effects in the analysis of the ERP data example
require(erp.easy)
## Loading required package: erp.easy
## Warning: package 'erp.easy' was built under R version 3.4.3
data(ERPdata)
dat<-ERPdata
?ERPdata
## starting httpd help server ...
## done
# extract only V6 data
dat <- ERPdata[, 1:4]
str(dat)
## 'data.frame': 13530 obs. of 4 variables:
## $ Subject : Factor w/ 15 levels "S1","S10","S11",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Stimulus: Factor w/ 2 levels "Negative","Neutral": 1 1 1 1 1 1 1 1 1 1 ...
## $ Time : int -100 -96 -92 -88 -84 -80 -76 -72 -68 -64 ...
## $ V6 : num -2.025 -2.928 -0.975 -2.228 -3.478 ...
ggplot(dat, aes(Time, V6, group = Subject)) +
stat_smooth(se = F, lwd = .1, alpha = .2) +
facet_wrap( ~ Stimulus) +
labs(x = "Time", y = "Amplitude") +
theme_bw()
## `geom_smooth()` using method = 'loess'

# m1
summary(m1 <- gam(V6 ~ Stimulus+ s(Time, by = Stimulus), data = dat))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## V6 ~ Stimulus + s(Time, by = Stimulus)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.84985 0.07672 -37.15 <2e-16 ***
## StimulusNeutral 1.84999 0.10850 17.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time):StimulusNegative 7.922 8.696 618.6 <2e-16 ***
## s(Time):StimulusNeutral 8.586 8.951 540.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.437 Deviance explained = 43.8%
## GCV = 39.876 Scale est. = 39.821 n = 13530
plot(m1, pages = 1, shift = m1$coef[1])

vis.gam(m1, theta = 29, phi = 13, ticktype = "detailed", se = 2)

# m2 - Include subjects as random effects
summary(m2 <- gam(V6 ~ Stimulus+ s(Time, by = Stimulus)+ s(Subject, bs = "re"), data = dat))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## V6 ~ Stimulus + s(Time, by = Stimulus) + s(Subject, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.84985 0.70691 -4.031 5.58e-05 ***
## StimulusNeutral 1.84999 0.09858 18.767 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time):StimulusNegative 8.041 8.755 744.7 <2e-16 ***
## s(Time):StimulusNeutral 8.651 8.965 654.4 <2e-16 ***
## s(Subject) 13.932 14.000 204.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.536 Deviance explained = 53.7%
## GCV = 32.948 Scale est. = 32.869 n = 13530
plot(m2, pages = 1, shift = m2$coef[1])

vis.gam(m2, theta = 29, phi = 13, ticktype = "detailed", se = 2)

Q8 skin conductance data
##gneralized additive mixed model
#skin conductance data
pacman::p_load(tidyverse, mgcv, gamm4, ElemStatLearn, itsadug)
dat<-read.csv("http://140.116.183.121/~sheu/lmm/Data/skinPotentials.csv", sep=",",header = T)
str(dat)
## 'data.frame': 143 obs. of 3 variables:
## $ ID : Factor w/ 13 levels "S101","S102",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Time : int 0 50 100 150 200 250 300 350 400 450 ...
## $ Amplitude: num 9.5 10 11 11 11 10.5 10 9.5 9.5 9.5 ...
ggplot(dat, aes(Time, Amplitude))+
geom_point()+
facet_wrap(~ID)+
theme_bw()

ggplot(dat, aes(Time, Amplitude))+
geom_point(color = "gray")+
geom_smooth(color = "gray")+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.data = mean_se, geom = "errorbar",width = 0)+
theme_bw()
## `geom_smooth()` using method = 'loess'

# Analysis of repeated interaction experiments (ID) using GAMs
## Fit the model with a non-linear effect over time:
summary(m1 <- bam(Amplitude ~ s(Time) + # fixed effects
s(Time, ID, bs = 'fs', m = 1), # random effects
# fs= smooth for each one in a study, and each smooth should have the same smoothing parameter
data = dat))
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has
## repeated 1-d smooths of same variable.
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Amplitude ~ s(Time) + s(Time, ID, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9715 0.4795 18.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time) 7.314 7.924 23.79 <2e-16 ***
## s(Time,ID) 80.472 116.000 20.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.962 Deviance explained = 98.6%
## fREML = 217.74 Scale est. = 0.22943 n = 143
#
par(mfrow=c(1,2))
## Plot effect of t(ime)
plot_smooth(m1, 'Time', rm.ranef = T, main = 'Effect of time')
## Summary:
## * Time : numeric predictor; with 30 values ranging from 0.000000 to 500.000000.
## * ID : factor; set to the value(s): S101. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(Time,ID)
##
## Plot individual variation:
plot(m1, select = 2)

anova(m1)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Amplitude ~ s(Time) + s(Time, ID, bs = "fs", m = 1)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time) 7.314 7.924 23.79 <2e-16
## s(Time,ID) 80.472 116.000 20.86 <2e-16
#試試看
## Define starting point for each time series:
dat2 <- start_event(dat, column = 'Time', event = 'ID')
# The amount of autocorrelation is set to the height of the second bar:
rhoval <- acf_resid(m1)[2]
## And we refit the model:
summary(m2 <- bam(Amplitude ~ s(Time) + # fixed effects
s(Time, ID, bs = 'fs', m = 1), # random effects
data = dat2,
rho = rhoval, AR.start = start.event # autocorrelation parameters
))
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has
## repeated 1-d smooths of same variable.
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Amplitude ~ s(Time) + s(Time, ID, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.967 0.484 18.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time) 7.39 7.819 22.89 <2e-16 ***
## s(Time,ID) 95.69 116.000 116.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.967 Deviance explained = 99.1%
## fREML = 218.07 Scale est. = 0.10873 n = 143
#
acf_resid(m2)

#其實m1m2在此case中差不多?