Author: 235208 Alias: Atonwind
The objectives of this problem set is to orient you to a number of activities in R. And to conduct a thoughtful exercise in appreciating the importance of data visualization. For each question create a code chunk or text response that completes/answers the activity or question requested. Finally, upon completion name your final output .html file as: YourName_ANLY512-Section-Year-Semester.html and upload it to the “Problem Set 2” assignmenet on Moodle.
anscombe data that is part of the library(datasets) in R. And assign that data to a new object called data.# place the code here
library(datasets)
data("anscombe")
data = anscombe
data
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
fBasics() package!)# place the code here
summary(data)
## x1 x2 x3 x4
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19
## y1 y2 y3 y4
## Min. : 4.260 Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.: 6.315 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median : 7.580 Median :8.140 Median : 7.11 Median : 7.040
## Mean : 7.501 Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.: 8.570 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :10.840 Max. :9.260 Max. :12.74 Max. :12.500
## end of summary()
if (!require("psych")) {
install.packages("psych")
library(psych)
}
## Loading required package: psych
describe(data)
## vars n mean sd median trimmed mad min max range skew kurtosis
## x1 1 11 9.0 3.32 9.00 9.00 4.45 4.00 14.00 10.00 0.00 -1.53
## x2 2 11 9.0 3.32 9.00 9.00 4.45 4.00 14.00 10.00 0.00 -1.53
## x3 3 11 9.0 3.32 9.00 9.00 4.45 4.00 14.00 10.00 0.00 -1.53
## x4 4 11 9.0 3.32 8.00 8.00 0.00 8.00 19.00 11.00 2.47 4.52
## y1 5 11 7.5 2.03 7.58 7.49 1.82 4.26 10.84 6.58 -0.05 -1.20
## y2 6 11 7.5 2.03 8.14 7.79 1.47 3.10 9.26 6.16 -0.98 -0.51
## y3 7 11 7.5 2.03 7.11 7.15 1.53 5.39 12.74 7.35 1.38 1.24
## y4 8 11 7.5 2.03 7.04 7.20 1.90 5.25 12.50 7.25 1.12 0.63
## se
## x1 1.00
## x2 1.00
## x3 1.00
## x4 1.00
## y1 0.61
## y2 0.61
## y3 0.61
## y4 0.61
## end of describe()
if (!require("fBasics")) {
install.packages("fBasics")
library(fBasics)
}
## Loading required package: fBasics
## Loading required package: timeDate
## Loading required package: timeSeries
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:psych':
##
## outlier
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:psych':
##
## tr
## nothing to do here...
## continue to Q3
# place the code to import graphics here
plot(data$x1, data$y1, main = "Scatter plots for x1 and y1", xlab = "x1", ylab = "y1")
## loop to generate the following code
plot(data$x2, data$y2, main = "Scatter plots for x2 and y2", xlab = "x2", ylab = "y2")
plot(data$x3, data$y3, main = "Scatter plots for x3 and y3", xlab = "x3", ylab = "y3")
plot(data$x4, data$y4, main = "Scatter plots for x4 and y4", xlab = "x4", ylab = "y4")
# place the code to import graphics here
par(mfrow=c(2,2))
plot(data$x1, data$y1, main = "Scatter plots for x1 and y1", xlab = "x1", ylab = "y1")
plot(data$x2, data$y2, main = "Scatter plots for x2 and y2", xlab = "x2", ylab = "y2")
plot(data$x3, data$y3, main = "Scatter plots for x3 and y3", xlab = "x3", ylab = "y3")
plot(data$x4, data$y4, main = "Scatter plots for x4 and y4", xlab = "x4", ylab = "y4")
lm() function.# place the code here
lm1 <- lm(data$y1 ~ data$x1)
lm2 <- lm(data$y2 ~ data$x2)
lm3 <- lm(data$y3 ~ data$x3)
lm4 <- lm(data$y4 ~ data$x4)
## end of linear model
# place the code to import graphics here
par(mfrow=c(2,2))
plot(data$x1, data$y1, main = "Scatter plots for x1 and y1", xlab = "x1", ylab = "y1")
abline(lm1, col = "Red")
plot(data$x2, data$y2, main = "Scatter plots for x2 and y2", xlab = "x2", ylab = "y2")
abline(lm2, col = "Red")
plot(data$x3, data$y3, main = "Scatter plots for x3 and y3", xlab = "x3", ylab = "y3")
abline(lm3, col = "Red")
plot(data$x4, data$y4, main = "Scatter plots for x4 and y4", xlab = "x4", ylab = "y4")
abline(lm4, col = "Red")
# place the code to import graphics here
anova(lm1)
Analysis of Variance Table
Response: data\(y1 Df Sum Sq Mean Sq F value Pr(>F) data\)x1 1 27.510 27.5100 17.99 0.00217 ** Residuals 9 13.763 1.5292
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
# end of lm1
anova(lm2)
Analysis of Variance Table
Response: data\(y2 Df Sum Sq Mean Sq F value Pr(>F) data\)x2 1 27.500 27.5000 17.966 0.002179 ** Residuals 9 13.776 1.5307
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
# end of lm2
anova(lm3)
Analysis of Variance Table
Response: data\(y3 Df Sum Sq Mean Sq F value Pr(>F) data\)x3 1 27.470 27.4700 17.972 0.002176 ** Residuals 9 13.756 1.5285
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
# end of lm3
anova(lm4)
Analysis of Variance Table
Response: data\(y4 Df Sum Sq Mean Sq F value Pr(>F) data\)x4 1 27.490 27.4900 18.003 0.002165 ** Residuals 9 13.742 1.5269
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
# end of lm4
It looks funny when all 4 scatter plots have the vary similar linear modal, thanks to the data visualization, it gives us another angle of view to see the numbers in the data. If we don’t have multiplie point of views, we may be deceive by the data and plots.