library(lattice)
xyplot(sbp ~ dbp | sex,
data=dta, cex=.5,
type=c("p","g","r"),
xlab="Diastolic pressure (mmHg)",
ylab="Systolic pressure (mmHg)")由lattice plot 可知,Female的slope略高於Male,在相同的dbp下,女生的sbp略高於男生
## Analysis of Variance Table
##
## Model 1: sbp ~ dbp
## Model 2: sbp ~ dbp + sex
## Model 3: sbp ~ dbp + sex + sex:dbp
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 4697 941778
## 2 4696 927853 1 13925 71.800 < 2.2e-16 ***
## 3 4695 910543 1 17310 89.256 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
我們可以看到m1及m2的model是顯著的,即代表性別對血壓有影響,且DBP與性別有交互作用。
plot(rstandard(m2) ~ fitted(m2),
cex=.5,
xlab="Fitted values",
ylab="Standardized residuals")
grid() ## The end
ggplot(dta, aes(Loudness, Length, group = Subject, alpha = Subject))+
geom_point() +
geom_line() +
stat_smooth(aes(group = 1), method = "lm", size = rel(.8)) +
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (db)", y = "Average log(Length)") +
theme(legend.position = "NONE")## Warning: Using alpha for a discrete variable is not advised.
## `geom_smooth()` using formula 'y ~ x'
## one model fits all
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -3.20 0.344 -9.31 2.53e-12
## 2 Loudness 0.0790 0.00482 16.4 2.69e-21
ggplot(dta, aes(Loudness, Length, group = Subject, color = Subject))+
geom_point() +
stat_smooth(method = "lm", se = F, size = rel(.5)) +
scale_color_grey()+
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (db)", y = "Average log(Length)") +
theme(legend.position = "NONE")## `geom_smooth()` using formula 'y ~ x'
##grouped by subject before doing regression
dta %>%
group_by(Subject) %>%
do(broom::tidy(lm(Length ~ Loudness, data = .)))## # A tibble: 20 x 6
## # Groups: Subject [10]
## Subject term estimate std.error statistic p.value
## <fct> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 A (Intercept) -4.49 0.748 -6.01 0.00923
## 2 A Loudness 0.103 0.0105 9.80 0.00225
## 3 B (Intercept) -4.59 0.462 -9.92 0.00218
## 4 B Loudness 0.0935 0.00647 14.5 0.000718
## 5 C (Intercept) -2.29 0.253 -9.05 0.00285
## 6 C Loudness 0.0695 0.00355 19.6 0.000291
## 7 D (Intercept) -2.92 0.338 -8.63 0.00327
## 8 D Loudness 0.0751 0.00474 15.8 0.000547
## 9 E (Intercept) -5.39 0.743 -7.24 0.00542
## 10 E Loudness 0.104 0.0104 9.98 0.00214
## 11 F (Intercept) -2.42 0.523 -4.63 0.0190
## 12 F Loudness 0.0774 0.00732 10.6 0.00181
## 13 G (Intercept) -2.81 0.312 -9.01 0.00288
## 14 G Loudness 0.0650 0.00437 14.9 0.000659
## 15 H (Intercept) -3.42 0.401 -8.53 0.00338
## 16 H Loudness 0.0822 0.00561 14.6 0.000691
## 17 I (Intercept) -1.95 0.512 -3.80 0.0319
## 18 I Loudness 0.0656 0.00718 9.14 0.00277
## 19 J (Intercept) -1.76 0.265 -6.63 0.00699
## 20 J Loudness 0.0552 0.00372 14.9 0.000660
##
## Call:
## lm(formula = Length ~ Subject/Loudness - 1, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3318 -0.1073 0.0023 0.1485 0.3242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## SubjectA -4.49370 0.48664 -9.23 2.8e-10
## SubjectB -4.58530 0.48664 -9.42 1.8e-10
## SubjectC -2.29360 0.48664 -4.71 5.2e-05
## SubjectD -2.91940 0.48664 -6.00 1.4e-06
## SubjectE -5.38510 0.48664 -11.07 4.1e-12
## SubjectF -2.42120 0.48664 -4.98 2.5e-05
## SubjectG -2.81090 0.48664 -5.78 2.6e-06
## SubjectH -3.42070 0.48664 -7.03 8.2e-08
## SubjectI -1.94870 0.48664 -4.00 0.00038
## SubjectJ -1.75910 0.48664 -3.61 0.00109
## SubjectA:Loudness 0.10265 0.00681 15.06 1.6e-15
## SubjectB:Loudness 0.09355 0.00681 13.73 1.8e-14
## SubjectC:Loudness 0.06950 0.00681 10.20 2.9e-11
## SubjectD:Loudness 0.07506 0.00681 11.02 4.6e-12
## SubjectE:Loudness 0.10391 0.00681 15.25 1.1e-15
## SubjectF:Loudness 0.07740 0.00681 11.36 2.2e-12
## SubjectG:Loudness 0.06497 0.00681 9.53 1.4e-10
## SubjectH:Loudness 0.08223 0.00681 12.07 4.9e-13
## SubjectI:Loudness 0.06561 0.00681 9.63 1.1e-10
## SubjectJ:Loudness 0.05525 0.00681 8.11 4.7e-09
##
## Residual standard error: 0.215 on 30 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.993
## F-statistic: 369 on 20 and 30 DF, p-value: <2e-16
## Subject Loudness Length sd n fit1
## 1 A 50 0.379 0.507 3 0.6388
## 2 A 60 1.727 0.904 3 1.6653
## 3 A 70 3.016 0.553 3 2.6918
## 4 A 80 3.924 0.363 3 3.7183
## 5 A 90 4.413 0.092 3 4.7448
## 6 B 50 0.260 0.077 3 0.0922
## 7 B 60 0.833 0.287 3 1.0277
## 8 B 70 2.009 0.396 3 1.9632
## 9 B 80 2.720 0.124 3 2.8987
## 10 B 90 3.994 0.068 3 3.8342
## 11 C 50 1.072 0.106 3 1.1814
## 12 C 60 1.942 0.324 3 1.8764
## 13 C 70 2.700 0.319 3 2.5714
## 14 C 80 3.250 0.137 3 3.2664
## 15 C 90 3.893 0.356 3 3.9614
## 16 D 50 0.697 0.288 3 0.8336
## 17 D 60 1.787 0.121 3 1.5842
## 18 D 70 2.283 0.139 3 2.3348
## 19 D 80 3.127 0.331 3 3.0854
## 20 D 90 3.780 0.314 3 3.8360
## 21 E 50 -0.520 0.168 3 -0.1896
## 22 E 60 1.124 0.550 3 0.8495
## 23 E 70 2.045 0.315 3 1.8886
## 24 E 80 3.113 0.247 3 2.9277
## 25 E 90 3.681 0.217 3 3.9668
## 26 F 50 1.348 0.842 3 1.4488
## 27 F 60 2.134 0.130 3 2.2228
## 28 F 70 3.249 0.626 3 2.9968
## 29 F 80 3.936 0.181 3 3.7708
## 30 F 90 4.317 0.107 3 4.5448
## 31 G 50 0.485 0.272 3 0.4376
## 32 G 60 1.158 0.413 3 1.0873
## 33 G 70 1.585 0.240 3 1.7370
## 34 G 80 2.289 0.102 3 2.3867
## 35 G 90 3.168 0.118 3 3.0364
## 36 H 50 0.682 0.189 3 0.6908
## 37 H 60 1.684 0.231 3 1.5131
## 38 H 70 2.090 0.540 3 2.3354
## 39 H 80 3.171 0.449 3 3.1577
## 40 H 90 4.050 0.309 3 3.9800
## 41 I 50 1.201 0.089 3 1.3318
## 42 I 60 2.142 0.514 3 1.9879
## 43 I 70 2.543 0.821 3 2.6440
## 44 I 80 3.563 0.283 3 3.3001
## 45 I 90 3.771 0.378 3 3.9562
## 46 J 50 1.111 0.174 3 1.0034
## 47 J 60 1.500 0.112 3 1.5559
## 48 J 70 2.010 0.555 3 2.1084
## 49 J 80 2.595 0.013 3 2.6609
## 50 J 90 3.326 0.289 3 3.2134
ggplot(dta, aes(Loudness, fit1, group = Subject, alpha = Subject))+
geom_path() +
geom_point(aes(Loudness, Length, group = Subject, alpha = Subject)) +
coord_cartesian(ylim = c(-1, 5)) +
labs(x = "Loudness (dB)", y = "Average log(Length)") +
theme(legend.position = "NONE")## Warning: Using alpha for a discrete variable is not advised.
## The end
## 'data.frame': 4011 obs. of 2 variables:
## $ Age : num 14.74 1.76 15.38 12.76 15.73 ...
## $ Weight: num 85 12.2 59.4 38.6 47.9 ...
indta <- read.table("https://gattonweb.uky.edu/sheather/book/docs/datasets/boxoffice.txt", header = T)
str(indta)## 'data.frame': 32 obs. of 2 variables:
## $ GrossBoxOffice: num 95.3 86.4 119.4 124.4 154.2 ...
## $ year : int 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 ...
indta4<-mutate(indta,Year_75=1:32) #1976~2007共32
inclass4<-rename(indta4,Box_Office = GrossBoxOffice)
head(inclass4)## Box_Office year Year_75
## 1 95.3 1976 1
## 2 86.4 1977 2
## 3 119.4 1978 3
## 4 124.4 1979 4
## 5 154.2 1980 5
## 6 174.3 1981 6
Box_office_t = β0 + β1 × Year_75t + εt, t = 1, …, 32, where εt ~ N(0, σ).
##
## Call:
## lm(formula = Box_Office ~ Year_75, data = inclass4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.38 -79.20 6.08 62.26 121.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -55.93 28.03 -2.0 0.055
## Year_75 29.53 1.48 19.9 <2e-16
##
## Residual standard error: 77.4 on 30 degrees of freedom
## Multiple R-squared: 0.93, Adjusted R-squared: 0.927
## F-statistic: 397 on 1 and 30 DF, p-value: <2e-16
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -55.931 28.0344 -1.9951 5.5186e-02
## Year_75 29.534 1.4827 19.9194 7.5559e-19
Generalized least squares #lecture note p.15
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
| Value | Std.Error | t-value | p-value | |
|---|---|---|---|---|
| (Intercept) | -55.931 | 28.0344 | -1.9951 | 0.05519 |
| Year_75 | 29.534 | 1.4827 | 19.9194 | 0.00000 |
Residual correlations
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
inclass4 <- inclass4 %>%
mutate(res0=resid(m1g,
type="normalized"))
acf(resid(m1g, type = "normalized"), main = "LM", ylim = c(-1, 1))##
## Durbin-Watson test
##
## data: res0 ~ year
## DW = 0.248, p-value = 2.3e-13
## alternative hypothesis: true autocorrelation is greater than 0
Model: Box_office_t = β0 + β1 × Year_75t + εt, t = 1, …, 32, where εt = εt-1 + νt, and νt ~ N(0, σ).
m2 <- gls(Box_Office ~ Year_75, data = inclass4, method = "ML",correlation=corAR1(form = ~ Year_75))
summary(m2)## Generalized least squares fit by maximum likelihood
## Model: Box_Office ~ Year_75
## Data: inclass4
## AIC BIC logLik
## 330.39 336.25 -161.19
##
## Correlation Structure: AR(1)
## Formula: ~Year_75
## Parameter estimate(s):
## Phi
## 0.87821
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 4.5141 72.744 0.0621 0.9509
## Year_75 27.0754 3.448 7.8533 0.0000
##
## Correlation:
## (Intr)
## Year_75 -0.782
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.934208 -1.385920 0.018223 0.332026 1.542698
##
## Residual standard error: 76.165
## Degrees of freedom: 32 total; 30 residual
inclass4 <- inclass4 %>%
mutate(fit1 = fitted(m2))
ggplot(inclass4, aes(Year_75, Box_Office)) +
geom_point(aes(Year_75, Box_Office), color = "grey") +
geom_path(aes(Year_75, fit1), col = "grey", size =rel(1))+
stat_smooth(method = "lm", se = F, size = rel(1), lty = 2, color = "blue")+
labs(x = "Year since 1975", y = "Gross Box Office (in million $)") +
theme(legend.position = "NONE")+
theme_bw()## `geom_smooth()` using formula 'y ~ x'
inclass4m2 <- inclass4 %>%
mutate(res1 = resid(m2, type = "normalized"))
ggplot(inclass4m2, aes(Year_75, res1)) +
geom_point(aes(Year_75, res0), color = "skyblue") +
geom_point(aes(Year_75, res1), color = "grey70")+
geom_hline(yintercept = 0, size = rel(1), color = "grey60")+
labs(x = "Year since 1975", y = "Standarized Residuals") +
theme(legend.position = "NONE")+
theme_bw()##
## Durbin-Watson test
##
## data: res1 ~ year
## DW = 2.2, p-value = 0.65
## alternative hypothesis: true autocorrelation is greater than 0
##從白天嘗試到黑夜~