Edit the R script with your own comments to explain what each code chunk does and then compile a report of the script in html for publication to moodle.
## set some options
options(digits=3, show.signif.stars=FALSE)
## load packages
pacman::p_load(alr4, tidyverse)
data(UN11, package="alr4")
## seed the random number generator to get the same sample
set.seed(6102)
## pick 81 countries from three regions
## arrange the rows by alphabetical order
dta <- UN11 %>%
filter(region %in% c("Africa", "Asia", "Europe")) %>%
sample_n(81) %>%
arrange(region)
## first 6 lines of data frame
head(dta)
region group fertility ppgdp lifeExpF pctUrban
Ghana Africa africa 3.99 1333 65.8 52
Seychelles Africa africa 2.34 11451 78.0 56
Gabon Africa africa 3.19 12469 64.3 86
Libya Africa africa 2.41 11321 77.9 78
Benin Africa africa 5.08 741 58.7 42
Burkina Faso Africa africa 5.75 520 57.0 27
## data dimensions - rows and columns
dim(dta)
[1] 81 6
R3 <- table(dta$region)
w <- R3/table(UN11$region)
dta$wt <- rep(1/w[w != 0], R3[R3 != 0])
summary(m0 <- lm(fertility ~ log(ppgdp), data=dta))
Call:
lm(formula = fertility ~ log(ppgdp), data = dta)
Residuals:
Min 1Q Median 3Q Max
-2.2686 -0.7716 0.0497 0.6811 2.6292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.313 0.575 14.46 < 2e-16
log(ppgdp) -0.652 0.068 -9.58 7.2e-15
Residual standard error: 1.07 on 79 degrees of freedom
Multiple R-squared: 0.537, Adjusted R-squared: 0.532
F-statistic: 91.8 on 1 and 79 DF, p-value: 7.15e-15
The results revelaed by simple linear regression of log-GDP to fertility is significant (p < .001) with Beta = -0.652.
summary(m1 <- update(m0, weights=wt))
Call:
lm(formula = fertility ~ log(ppgdp), data = dta, weights = wt)
Weighted Residuals:
Min 1Q Median 3Q Max
-3.031 -1.001 0.063 0.921 3.425
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.210 0.577 14.22 < 2e-16
log(ppgdp) -0.642 0.068 -9.44 1.3e-14
Residual standard error: 1.42 on 79 degrees of freedom
Multiple R-squared: 0.53, Adjusted R-squared: 0.524
F-statistic: 89.1 on 1 and 79 DF, p-value: 1.35e-14
After weighting the percentage of countried from each region, the results is still significant (p < .001) with Beta = -0.642.
ggplot(dta,
aes(log(ppgdp), fertility, label=region)) +
stat_smooth(method="lm", formula=y ~ x, se=F, col="peru", lwd=rel(.5)) +
stat_smooth(aes(weight=wt), method="lm", formula=y ~ x, se=F, lwd=rel(.5), col="gray")+
geom_text(check_overlap=TRUE, size=rel(2.3), aes(color=region))+
labs(x="GDP (US$ in log unit)",
y="Number of children per woman") +
theme_minimal() +
theme(legend.position="NONE")
The panel shows that GDP is negatively associated with fertility.
Replicate the analysis in the lexical decision latencies data example using the data:heid{languageR}. Perform a generalized least-squares regression and compare the results against an ordinary regression of RT onto BaseFrequency. You will need to augument the data with a variable for trial:
heid\(Trial <- as.numeric(unlist(apply(as.matrix(table(heid\)Subject)), 1, seq)))
# load packages
pacman::p_load(languageR, lattice, tidyverse, nlme, lmtest)
# load data
data(heid, package="languageR")
# see first 6 lines of data
head(heid)
Subject Word RT BaseFrequency
1 pp1 basaalheid 6.69 3.56
2 pp1 markantheid 6.81 5.16
3 pp1 ontroerdheid 6.51 5.55
4 pp1 contentheid 6.58 4.50
5 pp1 riantheid 6.86 4.53
6 pp1 tembaarheid 6.35 0.00
# check data structure
str(heid)
'data.frame': 832 obs. of 4 variables:
$ Subject : Factor w/ 26 levels "pp1","pp10","pp11",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Word : Factor w/ 40 levels "aftandsheid",..: 4 24 28 10 33 38 25 5 16 30 ...
$ RT : num 6.69 6.81 6.51 6.58 6.86 6.35 6.69 7.17 6.58 6.98 ...
$ BaseFrequency: num 3.56 5.16 5.55 4.5 4.53 0 1.61 3.61 2.56 5.86 ...
# select a subset of variables for current example
dta <- heid
dta$Trial <- as.numeric(unlist(apply(as.matrix(table(dta$Subject)), 1, seq)))
# set plot theme
ot <- theme_set(theme_bw())
# word frequency effect
ggplot(data = dta, aes(x = BaseFrequency, y = RT)) +
geom_point() +
stat_smooth(method="lm") +
labs(x = "Base Frequency", y = "Reaction Time (transformed)")
# trial effect by subject
ggplot(data = dta, aes(x = Trial, y = RT)) +
geom_point(pch =1, size = rel(.5)) +
geom_line() +
facet_wrap(~ Subject) +
labs(x = "Trial ID", y = "Reaction Time (transformed)")
acf.fnc(dta, group = "Subject", time = "Trial", x = "RT", plot = TRUE)
summary(m0 <- lm(RT ~ BaseFrequency, data = dta))
Call:
lm(formula = RT ~ BaseFrequency, data = dta)
Residuals:
Min 1Q Median 3Q Max
-0.5166 -0.2158 -0.0525 0.1599 0.8903
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.6417 0.0207 320.39 <2e-16
BaseFrequency -0.0155 0.0047 -3.29 0.001
Residual standard error: 0.282 on 830 degrees of freedom
Multiple R-squared: 0.0129, Adjusted R-squared: 0.0117
F-statistic: 10.8 on 1 and 830 DF, p-value: 0.00104
summary(m0a <- gls(RT ~ BaseFrequency, data = dta, method = "ML"))
Generalized least squares fit by maximum likelihood
Model: RT ~ BaseFrequency
Data: dta
AIC BIC logLik
258 273 -126
Coefficients:
Value Std.Error t-value p-value
(Intercept) 6.64 0.0207 320 0.000
BaseFrequency -0.02 0.0047 -3 0.001
Correlation:
(Intr)
BaseFrequency -0.882
Standardized residuals:
Min Q1 Med Q3 Max
-1.835 -0.766 -0.186 0.568 3.161
Residual standard error: 0.282
Degrees of freedom: 832 total; 830 residual
The results showed that BaseFrequency is significantly associted with RT.
summary(m1 <- update(m0a, correlation = corAR1(form = ~ Trial | Subject)))
Generalized least squares fit by maximum likelihood
Model: RT ~ BaseFrequency
Data: dta
AIC BIC logLik
64.3 83.2 -28.1
Correlation Structure: AR(1)
Formula: ~Trial | Subject
Parameter estimate(s):
Phi
0.462
Coefficients:
Value Std.Error t-value p-value
(Intercept) 6.64 0.02193 302.5 0e+00
BaseFrequency -0.01 0.00387 -3.6 4e-04
Correlation:
(Intr)
BaseFrequency -0.699
Standardized residuals:
Min Q1 Med Q3 Max
-1.835 -0.765 -0.184 0.586 3.159
Residual standard error: 0.281
Degrees of freedom: 832 total; 830 residual
After controlling the effect of trials sequential effect, the results showed that BaseFrequency is still significantly associted with RT.
anova(m0a, m1)
Model df AIC BIC logLik Test L.Ratio p-value
m0a 1 3 258.5 272.6 -126.2
m1 2 4 64.3 83.2 -28.1 1 vs 2 196 <.0001
The two models yield significant different.
dta <- dta %>% mutate(res0 = rstandard(m0),
res1 = resid(m1, type = "normalized"))
dwtest(res0 ~ BaseFrequency, data = dta)
Durbin-Watson test
data: res0 ~ BaseFrequency
DW = 1, p-value <2e-16
alternative hypothesis: true autocorrelation is greater than 0
dwtest(res1 ~ BaseFrequency, data = dta)
Durbin-Watson test
data: res1 ~ BaseFrequency
DW = 2, p-value = 1
alternative hypothesis: true autocorrelation is greater than 0
The results showed that it is important to control the sequential effect of trials.
acf.fnc(dta, group = "Subject", time = "Trial", x = "res1", plot = TRUE)
Replicate the analyses shown in the heights of boys example for the data in the heights of girls . You can ignore the grouping by mothers in the analysis.
# load packages
pacman::p_load(tidyverse, nlme, broom, magrittr, MuMIn)
# load data set
data(Oxboys, package="nlme")
# grouped data structure from nlme
str(Oxboys)
Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 234 obs. of 4 variables:
$ Subject : Ord.factor w/ 26 levels "10"<"26"<"25"<..: 13 13 13 13 13 13 13 13 13 5 ...
$ age : num -1 -0.7479 -0.463 -0.1643 -0.0027 ...
$ height : num 140 143 145 147 148 ...
$ Occasion: Ord.factor w/ 9 levels "1"<"2"<"3"<"4"<..: 1 2 3 4 5 6 7 8 9 1 ...
- attr(*, "formula")=Class 'formula' language height ~ age | Subject
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
- attr(*, "labels")=List of 2
..$ y: chr "Height"
..$ x: chr "Centered age"
- attr(*, "units")=List of 1
..$ y: chr "(cm)"
- attr(*, "FUN")=function (x)
..- attr(*, "source")= chr "function (x) max(x, na.rm = TRUE)"
- attr(*, "order.groups")= logi TRUE
plot(Oxboys)
# view first 6 lines
head(Oxboys)
Grouped Data: height ~ age | Subject
Subject age height Occasion
1 1 -1.0000 140 1
2 1 -0.7479 143 2
3 1 -0.4630 145 3
4 1 -0.1643 147 4
5 1 -0.0027 148 5
6 1 0.2466 150 6
# format data from long to wide format first
dta <- as.data.frame(Oxboys) %>%
select(Subject, Occasion, height) %>%
spread(Occasion, height) %>%
select(-Subject)
dta %>% cor
1 2 3 4 5 6 7 8 9
1 1.000 0.996 0.997 0.992 0.987 0.983 0.970 0.962 0.957
2 0.996 1.000 0.998 0.996 0.991 0.989 0.974 0.963 0.959
3 0.997 0.998 1.000 0.997 0.992 0.991 0.977 0.968 0.964
4 0.992 0.996 0.997 1.000 0.996 0.995 0.982 0.975 0.970
5 0.987 0.991 0.992 0.996 1.000 0.997 0.991 0.984 0.979
6 0.983 0.989 0.991 0.995 0.997 1.000 0.993 0.988 0.986
7 0.970 0.974 0.977 0.982 0.991 0.993 1.000 0.995 0.992
8 0.962 0.963 0.968 0.975 0.984 0.988 0.995 1.000 0.998
9 0.957 0.959 0.964 0.970 0.979 0.986 0.992 0.998 1.000
dta %>% var %>% diag
1 2 3 4 5 6 7 8 9
53.5 52.3 57.4 62.5 64.0 68.4 77.3 81.9 87.5
# set plot theme to black-n-white
ot <- theme_set(theme_bw())
# one regression line fits all
ggplot(Oxboys, aes(x = age, y = height, color = Subject)) +
geom_point() +
stat_smooth(aes(group = 1), method = "lm") +
labs(x = "Age (scaled)", y = "Height (cm)") +
theme(legend.position = "NONE")
m0 <- gls(height ~ age, data = Oxboys)
m0
Generalized least squares fit by REML
Model: height ~ age
Data: Oxboys
Log-restricted-likelihood: -819
Coefficients:
(Intercept) age
149.37 6.52
Degrees of freedom: 234 total; 232 residual
Residual standard error: 8.08
m1 <- update(m0, corr = corAR1(form = ~ 1 | Subject), method = "ML")
m1
Generalized least squares fit by maximum likelihood
Model: height ~ age
Data: Oxboys
Log-likelihood: -341
Coefficients:
(Intercept) age
149.73 6.55
Correlation Structure: AR(1)
Formula: ~1 | Subject
Parameter estimate(s):
Phi
0.995
Degrees of freedom: 234 total; 232 residual
Residual standard error: 8.23
The results is very similar to the results of prior model
m2 <- update(m0, weights = varIdent(form = ~ 1 | Occasion))
m2
Generalized least squares fit by REML
Model: height ~ age
Data: Oxboys
Log-restricted-likelihood: -817
Coefficients:
(Intercept) age
149.36 6.46
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | Occasion
Parameter estimates:
1 2 3 4 5 6 7 8 9
1.000 0.986 1.032 1.075 1.087 1.126 1.197 1.235 1.279
Degrees of freedom: 234 total; 232 residual
Residual standard error: 7.23
The coefficient in this model is simliar to the previous model and the log-restricted-likelihood function is -817.17.
m3 <- update(m1, weights = varIdent(form = ~ 1 | Occasion))
m3
Generalized least squares fit by maximum likelihood
Model: height ~ age
Data: Oxboys
Log-likelihood: -321
Coefficients:
(Intercept) age
150.67 6.56
Correlation Structure: AR(1)
Formula: ~1 | Subject
Parameter estimate(s):
Phi
0.996
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | Occasion
Parameter estimates:
1 2 3 4 5 6 7 8 9
1.000 0.989 1.048 1.098 1.115 1.168 1.238 1.262 1.299
Degrees of freedom: 234 total; 232 residual
Residual standard error: 7.17
After controlling the subject’s correlation, the log-likehood function becomes larger than the M2, suggestion that M3 (the model including correction of subject) is better.
model.sel(m0, m1, m2, m3)
Model selection table
(Intrc) age family correlation weights
m3 151 6.56 gaussian(identity) corAR1(~1|Subject) varIdent(~1|Occasion)
m1 150 6.55 gaussian(identity) corAR1(~1|Subject)
m0 149 6.52 gaussian(identity)
m2 149 6.46 gaussian(identity) varIdent(~1|Occasion)
REML df logLik AICc delta weight
m3 FALSE 12 -321 668 0.0 1
m1 FALSE 4 -341 690 22.3 0
m0 3 -819 1644 975.9 0
m2 11 -817 1658 989.4 0
Models ranked by AICc(x)
In this case, M3 is the best model.
# summary model output
lapply(list(m0, m1, m2, m3), summary)
[[1]]
Generalized least squares fit by REML
Model: height ~ age
Data: Oxboys
AIC BIC logLik
1644 1654 -819
Coefficients:
Value Std.Error t-value p-value
(Intercept) 149.4 0.529 283 0
age 6.5 0.817 8 0
Correlation:
(Intr)
age -0.035
Standardized residuals:
Min Q1 Med Q3 Max
-2.6801 -0.6361 0.0603 0.5880 2.3443
Residual standard error: 8.08
Degrees of freedom: 234 total; 232 residual
[[2]]
Generalized least squares fit by maximum likelihood
Model: height ~ age
Data: Oxboys
AIC BIC logLik
690 704 -341
Correlation Structure: AR(1)
Formula: ~1 | Subject
Parameter estimate(s):
Phi
0.995
Coefficients:
Value Std.Error t-value p-value
(Intercept) 149.7 1.605 93.3 0
age 6.5 0.219 29.9 0
Correlation:
(Intr)
age 0
Standardized residuals:
Min Q1 Med Q3 Max
-2.678 -0.668 0.015 0.532 2.256
Residual standard error: 8.23
Degrees of freedom: 234 total; 232 residual
[[3]]
Generalized least squares fit by REML
Model: height ~ age
Data: Oxboys
AIC BIC logLik
1656 1694 -817
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | Occasion
Parameter estimates:
1 2 3 4 5 6 7 8 9
1.000 0.986 1.032 1.075 1.087 1.126 1.197 1.235 1.279
Coefficients:
Value Std.Error t-value p-value
(Intercept) 149.4 0.525 284.5 0
age 6.5 0.817 7.9 0
Correlation:
(Intr)
age 0.135
Standardized residuals:
Min Q1 Med Q3 Max
-2.5132 -0.5901 0.0622 0.6139 2.0557
Residual standard error: 7.23
Degrees of freedom: 234 total; 232 residual
[[4]]
Generalized least squares fit by maximum likelihood
Model: height ~ age
Data: Oxboys
AIC BIC logLik
667 708 -321
Correlation Structure: AR(1)
Formula: ~1 | Subject
Parameter estimate(s):
Phi
0.996
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | Occasion
Parameter estimates:
1 2 3 4 5 6 7 8 9
1.000 0.989 1.048 1.098 1.115 1.168 1.238 1.262 1.299
Coefficients:
Value Std.Error t-value p-value
(Intercept) 150.7 1.347 111.9 0
age 6.6 0.268 24.5 0
Correlation:
(Intr)
age 0.682
Standardized residuals:
Min Q1 Med Q3 Max
-2.6341 -0.7336 -0.0947 0.4401 1.8920
Residual standard error: 7.17
Degrees of freedom: 234 total; 232 residual
acf(resid(m0, type = "normalized"), main = "m0")
acf(resid(m1, type = "normalized"), main = "m1")
acf(resid(m2, type = "normalized"), main = "m2")
acf(resid(m3, type = "normalized"), main = "m3")
BTW, hope to study abroad at Oxford.
The data set concerns student evaluation of instructor’s beauty and teaching quality for several courses at the University of Texas. The teaching evaluatons were done at the end of the semester, and the beauty judgments were made later, by six students who had not attended the classes and were not aware of the course evaluations. Carry out the analysis presented in the R script by commenting on what each code chunk does. You can spin the script to a markdown file for html output.
Source: Hamermesh, D.S., & Parker, A.M. (2005). Beauty in the classroom: instructor’s pulchritude and putative pedagogical productivity.a Economics and Education Review, 24, 369-376. Reported in Gelman, A., & Hill, J. (2006). Data analysis using regression and hierarchical/multilevel models. p. 51.
Column 1: Course evaluation score Column 2: Beauty score Column 3: Gender of professor, 1 = Female, 0 = Male Column 4: Pofessor age in years Column 5: Minority status of professor, 1 = Minority, 0 = Others Column 6: Tenure status of professor, 1 = Tenured, 0 = No Column 7: Course ID
#
dta <- read.table("./data/course_eval.txt", h=T)
#
dta$Course <- as.factor(dta$Course)
xyplot(Score ~ Beauty | Course, data=dta,
ylab="Average course evaluation score",
xlab="Beauty judgment score",
type=c("p", "g", "r"))
dtag <- # create a new copy of the groupedData object
groupedData(Score ~ Beauty | Course,
data=as.data.frame( dta ),
FUN=mean,
labels=list(x="Beauty score",
y="Couse evaluation score" ))
plot(dtag, asp=1)
m0 <- lm(Score ~ Beauty, data=dta)
summary(m0)
Call:
lm(formula = Score ~ Beauty, data = dta)
Residuals:
Min 1Q Median 3Q Max
-1.8002 -0.3630 0.0725 0.4021 1.1037
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0100 0.0255 157.21 < 2e-16
Beauty 0.1330 0.0322 4.13 4.2e-05
Residual standard error: 0.545 on 461 degrees of freedom
Multiple R-squared: 0.0357, Adjusted R-squared: 0.0336
F-statistic: 17.1 on 1 and 461 DF, p-value: 4.25e-05
The results showed that Beauty is postively associated with Course Evaluation scores.
with(dta, plot(Beauty, Score, bty="n",
xlab="Beauty score",
ylab="Average course evaluation"))
grid()
abline(m0)
t.test(coef(lmList(Score ~ Beauty | Course, data = dta))["Beauty"])
One Sample t-test
data: coef(lmList(Score ~ Beauty | Course, data = dta))["Beauty"]
t = 0.5, df = 30, p-value = 0.6
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-2.19 3.77
sample estimates:
mean of x
0.79
m1 <- lm(Score ~ Course/Beauty - 1, data=dta)
#
summary(m1)
Call:
lm(formula = Score ~ Course/Beauty - 1, data = dta)
Residuals:
Min 1Q Median 3Q Max
-1.8108 -0.2739 0.0135 0.3314 1.0935
Coefficients:
Estimate Std. Error t value Pr(>|t|)
Course0 4.0316 0.0304 132.83 < 2e-16
Course1 3.7049 1.2199 3.04 0.00254
Course2 4.4852 0.4880 9.19 < 2e-16
Course3 3.8806 0.2535 15.31 < 2e-16
Course4 3.8359 0.1207 31.79 < 2e-16
Course5 4.0863 0.3073 13.30 < 2e-16
Course6 4.2195 0.3775 11.18 < 2e-16
Course7 3.4491 0.4324 7.98 1.6e-14
Course8 5.0785 1.0201 4.98 9.5e-07
Course9 3.9284 0.2150 18.27 < 2e-16
Course10 4.8393 1.8830 2.57 0.01053
Course11 4.1322 0.4060 10.18 < 2e-16
Course12 3.3081 0.6667 4.96 1.0e-06
Course13 4.1574 0.6147 6.76 4.8e-11
Course14 3.6171 0.4442 8.14 4.9e-15
Course15 2.4200 0.4187 5.78 1.5e-08
Course16 -16.9734 22.0939 -0.77 0.44280
Course17 4.4669 0.2982 14.98 < 2e-16
Course18 4.2042 0.3147 13.36 < 2e-16
Course19 3.5362 0.2520 14.03 < 2e-16
Course20 4.4540 0.4957 8.99 < 2e-16
Course21 3.6620 0.1425 25.70 < 2e-16
Course22 3.7286 0.2064 18.06 < 2e-16
Course23 3.5310 0.5096 6.93 1.7e-11
Course24 3.7810 0.3552 10.65 < 2e-16
Course25 20.7529 13.6132 1.52 0.12818
Course26 4.2462 0.3138 13.53 < 2e-16
Course27 4.5522 0.6720 6.77 4.5e-11
Course28 3.6051 1.3974 2.58 0.01024
Course29 3.8039 0.3909 9.73 < 2e-16
Course30 4.3076 0.3272 13.17 < 2e-16
Course0:Beauty 0.1462 0.0378 3.87 0.00013
Course1:Beauty 0.9109 1.3378 0.68 0.49632
Course2:Beauty 0.2416 0.8978 0.27 0.78795
Course3:Beauty -0.0458 1.4114 -0.03 0.97412
Course4:Beauty 0.5401 0.2909 1.86 0.06405
Course5:Beauty 0.2275 0.4206 0.54 0.58888
Course6:Beauty 0.7948 0.6393 1.24 0.21453
Course7:Beauty -0.1954 0.4447 -0.44 0.66051
Course8:Beauty 1.8980 1.4103 1.35 0.17913
Course9:Beauty -1.9625 0.7006 -2.80 0.00534
Course10:Beauty 0.5348 1.9240 0.28 0.78117
Course11:Beauty -0.4049 0.5014 -0.81 0.41987
Course12:Beauty -1.9471 1.6706 -1.17 0.24452
Course13:Beauty 0.5055 0.9294 0.54 0.58682
Course14:Beauty -0.1386 0.8917 -0.16 0.87659
Course15:Beauty -0.3753 0.5578 -0.67 0.50140
Course16:Beauty -18.2149 19.1411 -0.95 0.34187
Course17:Beauty 0.3354 0.4471 0.75 0.45349
Course18:Beauty -0.1303 0.4932 -0.26 0.79172
Course19:Beauty -0.3202 0.3246 -0.99 0.32451
Course20:Beauty 0.0692 0.8893 0.08 0.93801
Course21:Beauty -0.0380 0.1891 -0.20 0.84096
Course22:Beauty 0.1641 0.2424 0.68 0.49881
Course23:Beauty -1.3347 0.7649 -1.74 0.08178
Course24:Beauty 0.0756 0.9732 0.08 0.93811
Course25:Beauty 40.5972 32.6559 1.24 0.21453
Course26:Beauty 0.2253 0.3363 0.67 0.50332
Course27:Beauty 0.9815 1.2156 0.81 0.41987
Course28:Beauty 0.7384 0.9699 0.76 0.44693
Course29:Beauty 0.4722 0.2924 1.61 0.10711
Course30:Beauty 0.1544 0.2311 0.67 0.50451
Residual standard error: 0.525 on 401 degrees of freedom
Multiple R-squared: 0.985, Adjusted R-squared: 0.983
F-statistic: 434 on 62 and 401 DF, p-value: <2e-16
After conducting the individual course comparision, the results revealed that beauty did not associate with the course evaluation scores for almost course.
summary(m1)$coef[which(summary(m1)$coef[-c(1:31),4] <0.05),]
Estimate Std. Error t value Pr(>|t|)
Course0 4.03 0.0304 132.8 0.0e+00
Course9 3.93 0.2150 18.3 1.1e-54
Only Course 0 and 9 are still associated with the course evaluation scores.