data <- read.table("NSSdata.txt")
pre <- data[,1]
post <- data[,2]
# Paired t-test
t.test(pre, post, paired=TRUE)
Paired t-test
data: pre and post
t = 10.058, df = 142, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.06545084 0.09747364
sample estimates:
mean of the differences
0.08146224
# Two-sample t-test
t.test(pre, post)
Welch Two Sample t-test
data: pre and post
t = 7.3372, df = 264.99, p-value = 2.669e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.05960153 0.10332295
sample estimates:
mean of x mean of y
0.8180357 0.7365734
# Welch test
t.test(pre, post, var.equal=FALSE)
Welch Two Sample t-test
data: pre and post
t = 7.3372, df = 264.99, p-value = 2.669e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.05960153 0.10332295
sample estimates:
mean of x mean of y
0.8180357 0.7365734
# Plots
boxplot(pre, post, names=c("2019","2021"),
main="NSS Satisfaction Comparison")
plot(pre, post,
xlab="2019", ylab="2021",
main="Scatter Plot NSS")
abline(0,1,col="red")
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
# Question 2(a)
volume <- c(0.7458,0.7458,0.7386,1.1875,1.3613,1.4265,1.1296,1.3179,
1.6365,1.4410,1.7524,1.5206,1.5496,1.5424,1.3831,1.6075,
2.4475,1.9841,1.8610,1.8030,2.4982,2.2954,2.6285,2.7734,
3.0847,4.0116,4.0333,4.2216,3.7292,3.6930,5.5757)
girth <- c(66.23,68.62,70.22,85.38,83.79,86.18,87.78,87.78,
88.57,89.37,90.17,90.97,90.97,93.36,95.76,102.94,
102.94,106.13,109.32,110.12,111.71,113.31,115.70,127.67,
130.07,138.05,139.64,142.84,143.63,143.63,164.38)
height <- c(21.0,19.5,18.9,21.6,24.3,24.9,19.8,22.5,
24.0,22.5,23.7,22.8,22.8,20.7,22.5,22.2,
25.5,25.8,21.3,19.2,23.4,24.0,22.2,21.6,
23.1,24.3,24.6,24.0,24.0,24.0,26.1)
data <- data.frame(volume,girth,height)
#Question 2(b)
model <- lm(volume ~ girth + height, data=data)
summary(model)
Call:
lm(formula = volume ~ girth + height, data = data)
Residuals:
Min 1Q Median 3Q Max
-0.47061 -0.19120 -0.02143 0.15448 0.61193
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.241242 0.624123 -6.796 2.21e-07 ***
girth 0.042641 0.002389 17.845 < 2e-16 ***
height 0.084128 0.031302 2.688 0.012 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2807 on 28 degrees of freedom
Multiple R-squared: 0.9481, Adjusted R-squared: 0.9444
F-statistic: 255.8 on 2 and 28 DF, p-value: < 2.2e-16
#Question 2(c)
new <- data.frame(
girth=c(65,90,115,140,165),
height=c(21,22,23,24,25)
)
predict(model, new)
1 2 3 4 5
0.2970858 1.4472299 2.5973739 3.7475180 4.8976620
#Question 2(d)
predict(model, new, interval="prediction")
fit lwr upr
1 0.2970858 -0.3115897 0.9057613
2 1.4472299 0.8592903 2.0351695
3 2.5973739 2.0118607 3.1828871
4 3.7475180 3.1459002 4.3491358
5 4.8976620 4.2628173 5.5325068
#Question 2(e)
plot(model) # gives residual + QQ plots
#Question 3(a)
scores <- read.table("ScoreData.txt")
rating <- scores[,1]
product <- scores[,2]
region <- scores[,3]
regionmissing <- scores[,4]
rating <- rating[regionmissing < 1]
product <- product[regionmissing < 1]
region <- region[regionmissing < 1]
#Question 3(b)
product <- factor(product)
region <- factor(region)
#Question 3(c)
model <- lm(rating ~ region)
anova(model)
Analysis of Variance Table
Response: rating
Df Sum Sq Mean Sq F value Pr(>F)
region 8 22.3 2.7847 2.3015 0.01837 *
Residuals 23472 28399.2 1.2099
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Question 3(d)
scores <- read.table("ScoreData.txt")
rating <- scores[,1]
region <- scores[,3]
regionmissing <- scores[,4]
rating <- rating[regionmissing < 1]
region <- region[regionmissing < 1]
region <- factor(region)
aggregate(rating ~ region, FUN = mean)
############################################################
# QUESTION 4 – FINAL R CODE
############################################################
#############################
# (a) Read EPL data file
#############################
EPL <- read.table("epl.txt", header = FALSE)
#############################
# (b) Convert matrix to score vector
#############################
scores <- c(EPL[1, ], EPL[2, ], EPL[3, ], EPL[4, ],
EPL[5, ], EPL[6, ], EPL[7, ], EPL[8, ],
EPL[9, ], EPL[10, ], EPL[11, ], EPL[12, ],
EPL[13, ], EPL[14, ], EPL[15, ], EPL[16, ],
EPL[17, ], EPL[18, ], EPL[19, ], EPL[20, ])
scores <- as.numeric(scores)
#############################
# (c) Team names
#############################
teamnames <- c("Arsenal","Villa","Brentford","Brighton","Burnley",
"Chelsea","Palace","Everton","Leeds","Leicester",
"Liverpool","City","United","Newcastle","Norwich",
"Southampton","Tottenham","Watford","Westham","Wolves")
#############################
# (d) Create fixtures
#############################
teammix <- function(x, y){
names <- vector("character", 38)
for(i in 1:19){
names[(2*i)-1] <- x
names[2*i] <- y[i]
}
names
}
opponentmix <- function(x, y){
names2 <- vector("character", 38)
for(i in 1:19){
names2[(2*i)-1] <- y[i]
names2[2*i] <- x
}
names2
}
teammix2 <- function(a){
teammix(teamnames[a], teamnames[-a])
}
teammix3 <- function(a){
opponentmix(teamnames[a], teamnames[-a])
}
team <- c(teammix2(1), teammix2(2), teammix2(3), teammix2(4),
teammix2(5), teammix2(6), teammix2(7), teammix2(8),
teammix2(9), teammix2(10), teammix2(11), teammix2(12),
teammix2(13), teammix2(14), teammix2(15), teammix2(16),
teammix2(17), teammix2(18), teammix2(19), teammix2(20))
opponent <- c(teammix3(1), teammix3(2), teammix3(3), teammix3(4),
teammix3(5), teammix3(6), teammix3(7), teammix3(8),
teammix3(9), teammix3(10), teammix3(11), teammix3(12),
teammix3(13), teammix3(14), teammix3(15), teammix3(16),
teammix3(17), teammix3(18), teammix3(19), teammix3(20))
#############################
# (e) Home advantage variable
#############################
home <- rep(c(1,0), 380)
#############################
# (f) Response transformation
#############################
yvariable <- 1/(1 + scores)
#############################
# (g) Convert to factors
#############################
team <- factor(team)
opponent <- factor(opponent)
#############################
# (h) Fit null model
#############################
null.lm <- lm(yvariable ~ 1)
#############################
# (i) Fit full model
#############################
full.lm <- lm(yvariable ~ home + team + opponent)
#############################
# (j) Output results
#############################
summary(null.lm)
Call:
lm(formula = yvariable ~ 1)
Residuals:
Min 1Q Median 3Q Max
-0.42795 -0.21962 -0.05295 0.44705 0.44705
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.55295 0.01071 51.64 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2952 on 759 degrees of freedom
summary(full.lm)
Call:
lm(formula = yvariable ~ home + team + opponent)
Residuals:
Min 1Q Median 3Q Max
-0.51672 -0.19866 -0.07396 0.21017 0.71047
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.598798 0.063464 9.435 < 2e-16 ***
home -0.050457 0.019560 -2.580 0.010087 *
teamBrentford 0.039282 0.061939 0.634 0.526143
teamBrighton 0.073310 0.061939 1.184 0.236965
teamBurnley 0.129144 0.061939 2.085 0.037419 *
teamChelsea -0.105918 0.061939 -1.710 0.087690 .
teamCity -0.151682 0.061939 -2.449 0.014567 *
teamEverton 0.052523 0.061939 0.848 0.396728
teamLeeds 0.064840 0.061939 1.047 0.295527
teamLeicester -0.058343 0.061939 -0.942 0.346534
teamLiverpool -0.193244 0.061939 -3.120 0.001881 **
teamNewcastle 0.020486 0.061939 0.331 0.740931
teamNorwich 0.218252 0.061939 3.524 0.000452 ***
teamPalace 0.025231 0.061939 0.407 0.683863
teamSouthampton 0.026379 0.061939 0.426 0.670317
teamTottenham -0.025741 0.061939 -0.416 0.677838
teamUnited -0.007917 0.061939 -0.128 0.898332
teamVilla -0.013912 0.061939 -0.225 0.822347
teamWatford 0.150648 0.061939 2.432 0.015250 *
teamWestham -0.031782 0.061939 -0.513 0.608020
teamWolves 0.115625 0.061939 1.867 0.062341 .
opponentBrentford -0.070301 0.061939 -1.135 0.256750
opponentBrighton -0.007106 0.061939 -0.115 0.908688
opponentBurnley -0.054606 0.061939 -0.882 0.378276
opponentChelsea 0.078636 0.061939 1.270 0.204646
opponentCity 0.142455 0.061939 2.300 0.021737 *
opponentEverton -0.093727 0.061939 -1.513 0.130663
opponentLeeds -0.151381 0.061939 -2.444 0.014763 *
opponentLeicester -0.100188 0.061939 -1.618 0.106199
opponentLiverpool 0.140268 0.061939 2.265 0.023832 *
opponentNewcastle -0.077431 0.061939 -1.250 0.211664
opponentNorwich -0.153206 0.061939 -2.474 0.013609 *
opponentPalace -0.003935 0.061939 -0.064 0.949359
opponentSouthampton -0.107133 0.061939 -1.730 0.084120 .
opponentTottenham 0.052593 0.061939 0.849 0.396104
opponentUnited -0.067083 0.061939 -1.083 0.279145
opponentVilla -0.039329 0.061939 -0.635 0.525655
opponentWatford -0.162685 0.061939 -2.627 0.008809 **
opponentWestham -0.070532 0.061939 -1.139 0.255189
opponentWolves 0.005208 0.061939 0.084 0.933009
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2696 on 720 degrees of freedom
Multiple R-squared: 0.2086, Adjusted R-squared: 0.1658
F-statistic: 4.867 on 39 and 720 DF, p-value: < 2.2e-16
#############################
# (k) Compare models
#############################
anova(null.lm, full.lm)
Analysis of Variance Table
Model 1: yvariable ~ 1
Model 2: yvariable ~ home + team + opponent
Res.Df RSS Df Sum of Sq F Pr(>F)
1 759 66.135
2 720 52.337 39 13.798 4.8673 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#############################
# (l) Diagnostics plots
#############################
par(mfrow=c(2,2))
plot(full.lm)
#############################
# (m) Team coefficients
#############################
coef(full.lm)
(Intercept) home teamBrentford
0.598798037 -0.050457393 0.039282407
teamBrighton teamBurnley teamChelsea
0.073310185 0.129143519 -0.105917659
teamCity teamEverton teamLeeds
-0.151681548 0.052523148 0.064839616
teamLeicester teamLiverpool teamNewcastle
-0.058343254 -0.193244048 0.020486111
teamNorwich teamPalace teamSouthampton
0.218252315 0.025231481 0.026378968
teamTottenham teamUnited teamVilla
-0.025740741 -0.007916667 -0.013912037
teamWatford teamWestham teamWolves
0.150648148 -0.031782407 0.115625000
opponentBrentford opponentBrighton opponentBurnley
-0.070300926 -0.007106481 -0.054606481
opponentChelsea opponentCity opponentEverton
0.078635913 0.142455357 -0.093726852
opponentLeeds opponentLeicester opponentLiverpool
-0.151380622 -0.100188492 0.140267857
opponentNewcastle opponentNorwich opponentPalace
-0.077430556 -0.153206019 -0.003935185
opponentSouthampton opponentTottenham opponentUnited
-0.107132937 0.052592593 -0.067083333
opponentVilla opponentWatford opponentWestham
-0.039328704 -0.162685185 -0.070532407
opponentWolves
0.005208333