The objectives of this problem set is to orient you to a number of activities in R. And to conduct a thoughtful exercise in appreciating the importance of data visualization. For each question create a code chunk or text response that completes/answers the activity or question requested. Finally, upon completion name your final output .html file as: YourName_ANLY512-Section-Year-Semester.html and upload it to the “Problem Set 2” assignment on Moodle.
anscombe data that is part of the library(datasets) in R. And assign that data to a new object called data.library(datasets)
data("anscombe")
show(anscombe)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
fBasics() package!)summary(anscombe)
## x1 x2 x3 x4
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19
## y1 y2 y3 y4
## Min. : 4.260 Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.: 6.315 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median : 7.580 Median :8.140 Median : 7.11 Median : 7.040
## Mean : 7.501 Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.: 8.570 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :10.840 Max. :9.260 Max. :12.74 Max. :12.500
#correlation
sapply(1:4, function(x) cor(anscombe[, x], anscombe[, x+4]))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
#variance
sapply(5:8, function(x) var(anscombe[, x]))
## [1] 4.127269 4.127629 4.122620 4.123249
attach(anscombe)
plot(x1,y1, main="Scatterplot First pair", xlab = "x1", ylab = "y1", pch=19)
plot(x2,y2, main="Scatterplot Second pair", xlab = "x2", ylab = "y2", pch=19)
plot(x3,y3, main="Scatterplot Third pair", xlab = "x3", ylab = "y3", pch=19)
plot(x4,y4, main="Scatterplot Forth pair", xlab = "x4", ylab = "y4", pch=19)
par(mfrow=c(2,2))
plot(x1,y1, main="Scatterplot First pair", xlab = "x1", ylab = "y1", pch=19)
plot(x2,y2, main="Scatterplot Second pair", xlab = "x2", ylab = "y2", pch=19)
plot(x3,y3, main="Scatterplot Third pair", xlab = "x3", ylab = "y3", pch=19)
plot(x4,y4, main="Scatterplot Forth pair", xlab = "x4", ylab = "y4", pch=19)
lm() function.lm(formula = x1 ~ y1, data = anscombe)
##
## Call:
## lm(formula = x1 ~ y1, data = anscombe)
##
## Coefficients:
## (Intercept) y1
## -0.9975 1.3328
lm(formula = x2 ~ y2, data = anscombe)
##
## Call:
## lm(formula = x2 ~ y2, data = anscombe)
##
## Coefficients:
## (Intercept) y2
## -0.9948 1.3325
lm(formula = x3 ~ y3, data = anscombe)
##
## Call:
## lm(formula = x3 ~ y3, data = anscombe)
##
## Coefficients:
## (Intercept) y3
## -1.000 1.333
lm(formula = x4 ~ y4, data = anscombe)
##
## Call:
## lm(formula = x4 ~ y4, data = anscombe)
##
## Coefficients:
## (Intercept) y4
## -1.004 1.334
mod1 <- lm (x1 ~ y1)
plot(x1,y1, main="Scatterplot First pair", xlab = "x1", ylab = "y1", pch=19)
mod1 <- lm (x1 ~ y1)
abline(mod1)
mod2 <- lm (x2 ~ y2)
plot(x2,y2, main="Scatterplot First pair", xlab = "x2", ylab = "y2", pch=19)
abline(mod2)
mod3 <- lm (x3 ~ y3)
plot(x3,y3, main="Scatterplot First pair", xlab = "x3", ylab = "y3", pch=19)
abline(mod3)
mod4 <- lm (x4 ~ y4)
plot(x4,y4, main="Scatterplot First pair", xlab = "x4", ylab = "y4", pch=19)
abline(mod4)
anova(mod1, test="Chisq")
Analysis of Variance Table
Response: x1 Df Sum Sq Mean Sq F value Pr(>F)
y1 1 73.32 73.320 17.99 0.00217 ** Residuals 9 36.68 4.076
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
anova(mod2, test="Chisq")
Analysis of Variance Table
Response: x2 Df Sum Sq Mean Sq F value Pr(>F)
y2 1 73.287 73.287 17.966 0.002179 ** Residuals 9 36.713 4.079
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
anova(mod3, test="Chisq")
Analysis of Variance Table
Response: x3 Df Sum Sq Mean Sq F value Pr(>F)
y3 1 73.296 73.296 17.972 0.002176 ** Residuals 9 36.704 4.078
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
anova(mod4, test="Chisq")
Analysis of Variance Table
Response: x4 Df Sum Sq Mean Sq F value Pr(>F)
y4 1 73.338 73.338 18.003 0.002165 ** Residuals 9 36.662 4.074
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1