Import libraries
library(tidyverse)
library(gridExtra)
Load dataset
data <- anscombe
head(data, 10)
Descriptive Analysis
summary(data[,c(1,2,3,4)])
x1 x2 x3 x4
Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8
1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8
Median : 9.0 Median : 9.0 Median : 9.0 Median : 8
Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9
3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8
Max. :14.0 Max. :14.0 Max. :14.0 Max. :19
summary(data[,c(5,6,7,8)])
y1 y2 y3 y4
Min. : 4.260 Min. :3.100 Min. : 5.39 Min. : 5.250
1st Qu.: 6.315 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
Median : 7.580 Median :8.140 Median : 7.11 Median : 7.040
Mean : 7.501 Mean :7.501 Mean : 7.50 Mean : 7.501
3rd Qu.: 8.570 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
Max. :10.840 Max. :9.260 Max. :12.74 Max. :12.500
sapply(data, sd)
x1 x2 x3 x4 y1 y2 y3 y4
3.316625 3.316625 3.316625 3.316625 2.031568 2.031657 2.030424 2.030579
Correlation Coefficient
cor(data$x1, data$y1)
[1] 0.8164205
cor(data$x2, data$y2)
[1] 0.8162365
cor(data$x3, data$y3)
[1] 0.8162867
cor(data$x4, data$y4)
[1] 0.8165214
Scatter Plots
p1 <- ggplot(data = data, aes(x=x1, y=y1)) +
geom_point(size=2)
p2 <- ggplot(data = data, aes(x=x2, y=y2)) +
geom_point(size=2)
p3 <- ggplot(data = data, aes(x=x3, y=y3)) +
geom_point(size=2)
p4 <- ggplot(data = data, aes(x=x4, y=y4)) +
geom_point(size=2)
grid.arrange(p1,p2,p3,p4, nrow=2, ncol=2)

Histograms
p1 <- ggplot(data = data, aes(x=x1)) +
geom_histogram(bins=5)
p2 <- ggplot(data = data, aes(x=x2)) +
geom_histogram(bins=5)
p3 <- ggplot(data = data, aes(x=x3)) +
geom_histogram(bins=5)
p4 <- ggplot(data = data, aes(x=x4)) +
geom_histogram(bins=5)
grid.arrange(p1,p2,p3,p4, nrow=2, ncol=2)

p1 <- ggplot(data = data, aes(x=y1)) +
geom_histogram(bins=5)
p2 <- ggplot(data = data, aes(x=y2)) +
geom_histogram(bins=5)
p3 <- ggplot(data = data, aes(x=y3)) +
geom_histogram(bins=5)
p4 <- ggplot(data = data, aes(x=y4)) +
geom_histogram(bins=5)
grid.arrange(p1,p2,p3,p4, nrow=2, ncol=2)

Regression Analysis
m1 <- lm(y1 ~ x1, data = data)
summary(m1)
Call:
lm(formula = y1 ~ x1, data = data)
Residuals:
Min 1Q Median 3Q Max
-1.92127 -0.45577 -0.04136 0.70941 1.83882
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0001 1.1247 2.667 0.02573 *
x1 0.5001 0.1179 4.241 0.00217 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
m2 <- lm(y2 ~ x2, data = data)
summary(m2)
Call:
lm(formula = y2 ~ x2, data = data)
Residuals:
Min 1Q Median 3Q Max
-1.9009 -0.7609 0.1291 0.9491 1.2691
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.001 1.125 2.667 0.02576 *
x2 0.500 0.118 4.239 0.00218 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.237 on 9 degrees of freedom
Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
m3 <- lm(y3 ~ x3, data = data)
summary(m3)
Call:
lm(formula = y3 ~ x3, data = data)
Residuals:
Min 1Q Median 3Q Max
-1.1586 -0.6146 -0.2303 0.1540 3.2411
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0025 1.1245 2.670 0.02562 *
x3 0.4997 0.1179 4.239 0.00218 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
m4 <- lm(y4 ~ x4, data = data)
summary(m4)
Call:
lm(formula = y4 ~ x4, data = data)
Residuals:
Min 1Q Median 3Q Max
-1.751 -0.831 0.000 0.809 1.839
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.0017 1.1239 2.671 0.02559 *
x4 0.4999 0.1178 4.243 0.00216 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.236 on 9 degrees of freedom
Multiple R-squared: 0.6667, Adjusted R-squared: 0.6297
F-statistic: 18 on 1 and 9 DF, p-value: 0.002165
p1 <- ggplot(data = data, aes(x=x1, y=y1)) +
geom_point(size=2) + geom_smooth(method='lm', formula = y~x)
p2 <- ggplot(data = data, aes(x=x2, y=y2)) +
geom_point(size=2) + geom_smooth(method='lm', formula = y~x)
p3 <- ggplot(data = data, aes(x=x3, y=y3)) +
geom_point(size=2) + geom_smooth(method='lm', formula = y~x)
p4 <- ggplot(data = data, aes(x=x4, y=y4)) +
geom_point(size=2) + geom_smooth(method='lm', formula = y~x)
grid.arrange(p1,p2,p3,p4, nrow=2, ncol=2)

Residual Diagnostics
par(mfrow=c(2,2))
plot(m1,1)
plot(m1,2)
plot(m1,3)
plot(m1,4)

par(mfrow=c(2,2))
plot(m2,1)
plot(m2,2)
plot(m2,3)
plot(m2,4)

par(mfrow=c(2,2))
plot(m3,1)
plot(m3,2)
plot(m3,3)
plot(m3,4)

par(mfrow=c(2,2))
plot(m4,1)
plot(m4,2)
Warning: not plotting observations with leverage one:
8
plot(m4,3)
Warning: not plotting observations with leverage one:
8
plot(m4,4)

Residual Normality Test
shapiro.test(m1$residuals)
Shapiro-Wilk normality test
data: m1$residuals
W = 0.94211, p-value = 0.5456
shapiro.test(m2$residuals)
Shapiro-Wilk normality test
data: m2$residuals
W = 0.87615, p-value = 0.09295
shapiro.test(m3$residuals)
Shapiro-Wilk normality test
data: m3$residuals
W = 0.74073, p-value = 0.001574
shapiro.test(m4$residuals)
Shapiro-Wilk normality test
data: m4$residuals
W = 0.96067, p-value = 0.78
LS0tCnRpdGxlOiAiQW5zY29tYmUgRGF0YXNldCBBbmFseXNpcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMjIEltcG9ydCBsaWJyYXJpZXMKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShncmlkRXh0cmEpCmBgYAoKIyMgTG9hZCBkYXRhc2V0CmBgYHtyfQpkYXRhIDwtIGFuc2NvbWJlCmhlYWQoZGF0YSwgMTApCmBgYAoKIyMgRGVzY3JpcHRpdmUgQW5hbHlzaXMKYGBge3J9CnN1bW1hcnkoZGF0YVssYygxLDIsMyw0KV0pCmBgYAoKYGBge3J9CnN1bW1hcnkoZGF0YVssYyg1LDYsNyw4KV0pCmBgYAoKYGBge3J9CiMgU3RhbmRhcmQgZGV2aWF0aW9uCgpzYXBwbHkoZGF0YSwgc2QpCmBgYAoKCiMjIENvcnJlbGF0aW9uIENvZWZmaWNpZW50CmBgYHtyfQpjb3IoZGF0YSR4MSwgZGF0YSR5MSkKY29yKGRhdGEkeDIsIGRhdGEkeTIpCmNvcihkYXRhJHgzLCBkYXRhJHkzKQpjb3IoZGF0YSR4NCwgZGF0YSR5NCkKYGBgCgojIyBTY2F0dGVyIFBsb3RzCgpgYGB7cn0KcDEgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14MSwgeT15MSkpICsgCiAgICAgICAgZ2VvbV9wb2ludChzaXplPTIpCnAyIDwtIGdncGxvdChkYXRhID0gZGF0YSwgYWVzKHg9eDIsIHk9eTIpKSArIAogICAgICAgIGdlb21fcG9pbnQoc2l6ZT0yKQpwMyA8LSBnZ3Bsb3QoZGF0YSA9IGRhdGEsIGFlcyh4PXgzLCB5PXkzKSkgKyAKICAgICAgICBnZW9tX3BvaW50KHNpemU9MikKcDQgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14NCwgeT15NCkpICsgCiAgICAgICAgZ2VvbV9wb2ludChzaXplPTIpCmdyaWQuYXJyYW5nZShwMSxwMixwMyxwNCwgbnJvdz0yLCBuY29sPTIpCmBgYAoKIyMgSGlzdG9ncmFtcwpgYGB7cn0KcDEgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14MSkpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDIgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14MikpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDMgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14MykpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDQgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14NCkpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKZ3JpZC5hcnJhbmdlKHAxLHAyLHAzLHA0LCBucm93PTIsIG5jb2w9MikKYGBgCgpgYGB7cn0KcDEgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD15MSkpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDIgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD15MikpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDMgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD15MykpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKcDQgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD15NCkpICsgCiAgICAgICAgZ2VvbV9oaXN0b2dyYW0oYmlucz01KSAKZ3JpZC5hcnJhbmdlKHAxLHAyLHAzLHA0LCBucm93PTIsIG5jb2w9MikKYGBgCgojIyBSZWdyZXNzaW9uIEFuYWx5c2lzCmBgYHtyfQptMSA8LSBsbSh5MSB+IHgxLCBkYXRhID0gZGF0YSkKc3VtbWFyeShtMSkKYGBgCgpgYGB7cn0KbTIgPC0gbG0oeTIgfiB4MiwgZGF0YSA9IGRhdGEpCnN1bW1hcnkobTIpCmBgYAoKYGBge3J9Cm0zIDwtIGxtKHkzIH4geDMsIGRhdGEgPSBkYXRhKQpzdW1tYXJ5KG0zKQpgYGAKCmBgYHtyfQptNCA8LSBsbSh5NCB+IHg0LCBkYXRhID0gZGF0YSkKc3VtbWFyeShtNCkKYGBgCgpgYGB7cn0KcDEgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14MSwgeT15MSkpICsgCiAgICAgICAgZ2VvbV9wb2ludChzaXplPTIpICsgZ2VvbV9zbW9vdGgobWV0aG9kPSdsbScsIGZvcm11bGEgPSB5fngpCnAyIDwtIGdncGxvdChkYXRhID0gZGF0YSwgYWVzKHg9eDIsIHk9eTIpKSArIAogICAgICAgIGdlb21fcG9pbnQoc2l6ZT0yKSArIGdlb21fc21vb3RoKG1ldGhvZD0nbG0nLCBmb3JtdWxhID0geX54KQpwMyA8LSBnZ3Bsb3QoZGF0YSA9IGRhdGEsIGFlcyh4PXgzLCB5PXkzKSkgKyAKICAgICAgICBnZW9tX3BvaW50KHNpemU9MikgKyBnZW9tX3Ntb290aChtZXRob2Q9J2xtJywgZm9ybXVsYSA9IHl+eCkKcDQgPC0gZ2dwbG90KGRhdGEgPSBkYXRhLCBhZXMoeD14NCwgeT15NCkpICsgCiAgICAgICAgZ2VvbV9wb2ludChzaXplPTIpICsgZ2VvbV9zbW9vdGgobWV0aG9kPSdsbScsIGZvcm11bGEgPSB5fngpCmdyaWQuYXJyYW5nZShwMSxwMixwMyxwNCwgbnJvdz0yLCBuY29sPTIpCmBgYAoKIyMgUmVzaWR1YWwgRGlhZ25vc3RpY3MKYGBge3J9CnBhcihtZnJvdz1jKDIsMikpCnBsb3QobTEsMSkKcGxvdChtMSwyKQpwbG90KG0xLDMpCnBsb3QobTEsNCkKYGBgCgpgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkKcGxvdChtMiwxKQpwbG90KG0yLDIpCnBsb3QobTIsMykKcGxvdChtMiw0KQpgYGAKCmBgYHtyfQpwYXIobWZyb3c9YygyLDIpKQpwbG90KG0zLDEpCnBsb3QobTMsMikKcGxvdChtMywzKQpwbG90KG0zLDQpCmBgYAoKYGBge3J9CnBhcihtZnJvdz1jKDIsMikpCnBsb3QobTQsMSkKcGxvdChtNCwyKQpwbG90KG00LDMpCnBsb3QobTQsNCkKYGBgCgojIyBSZXNpZHVhbCBOb3JtYWxpdHkgVGVzdApgYGB7cn0Kc2hhcGlyby50ZXN0KG0xJHJlc2lkdWFscykKYGBgCgpgYGB7cn0Kc2hhcGlyby50ZXN0KG0yJHJlc2lkdWFscykKYGBgCgpgYGB7cn0Kc2hhcGlyby50ZXN0KG0zJHJlc2lkdWFscykKYGBgCgpgYGB7cn0Kc2hhcGlyby50ZXN0KG00JHJlc2lkdWFscykKYGBgCgoKCgoKCg==