The objectives of this problem set is to orient you to a number of activities in R. And to conduct a thoughtful exercise in appreciating the importance of data visualization. For each question create a code chunk or text response that completes/answers the activity or question requested. Finally, upon completion name your final output .html file as: YourName_ANLY512-Section-Year-Semester.html and upload it to the Rpubs site and submit the link to the hosted file via Moodle.
anscombe data that is part of the library(datasets) in R. And assign that data to a new object called data.data = anscombe
head(data)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
fBasics() package!)require("fBasics")
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.4.2
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.4.3
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.4.2
basic.stats <- colStats(data,
FUN = function(x){c(mean(x), var(x))})
rownames(basic.stats) <- c("Mean", "Variance")
basic.stats
## x1 x2 x3 x4 y1 y2 y3 y4
## Mean 9 9 9 9 7.500909 7.500909 7.50000 7.500909
## Variance 11 11 11 11 4.127269 4.127629 4.12262 4.123249
cor <- rbind(cor(data$x1, data$y1),
cor(data$x2, data$y2),
cor(data$x3, data$y3),
cor(data$x4, data$y4))
colnames(cor) <- c("Correlation")
rownames(cor) <- c("(x1, y1)", "(x2, y2)", "(x3, y3)", "(x4, y4)")
cor
## Correlation
## (x1, y1) 0.8164205
## (x2, y2) 0.8162365
## (x3, y3) 0.8162867
## (x4, y4) 0.8165214
plot(data$x1, data$y1)
plot(data$x2, data$y2)
plot(data$x3, data$y3)
plot(data$x4, data$y4)
par(mfrow=c(2,2))
plot(data$x1, data$y1, pch = 16, xlab="x", ylab="y", main= "1")
plot(data$x2, data$y2, pch = 16,xlab="x", ylab="y", main= "2")
plot(data$x3, data$y3, pch = 16,xlab="x", ylab="y", main= "3")
plot(data$x4, data$y4, pch = 16,xlab="x", ylab="y", main= "4")
lm() function.m1 = lm(y1~x1, data = data)
m2 = lm(y2~x2, data = data)
m3 = lm(y3~x3, data = data)
m4 = lm(y4~x4, data = data)
par(mfrow=c(2,2))
plot(data$x1, data$y1,pch = 16, xlab="x", ylab="y", main= "1")
abline(m1)
plot(data$x2, data$y2,pch = 16, xlab="x", ylab="y", main= "2")
abline(m2)
plot(data$x3, data$y3,pch = 16, xlab="x", ylab="y", main= "3")
abline(m3)
plot(data$x4, data$y4,pch = 16, xlab="x", ylab="y", main= "4")
abline(m4)
summary(m1)$r.squared
[1] 0.6665425
summary(m2)$r.squared
[1] 0.666242
summary(m3)$r.squared
[1] 0.666324
summary(m4)$r.squared
[1] 0.6667073
summary(m1)
##
## Call:
## lm(formula = y1 ~ x1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.92127 -0.45577 -0.04136 0.70941 1.83882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0001 1.1247 2.667 0.02573 *
## x1 0.5001 0.1179 4.241 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
## F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
summary(m2)
##
## Call:
## lm(formula = y2 ~ x2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9009 -0.7609 0.1291 0.9491 1.2691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.001 1.125 2.667 0.02576 *
## x2 0.500 0.118 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
summary(m3)
##
## Call:
## lm(formula = y3 ~ x3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1586 -0.6146 -0.2303 0.1540 3.2411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0025 1.1245 2.670 0.02562 *
## x3 0.4997 0.1179 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
summary(m4)
##
## Call:
## lm(formula = y4 ~ x4, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.751 -0.831 0.000 0.809 1.839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0017 1.1239 2.671 0.02559 *
## x4 0.4999 0.1178 4.243 0.00216 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared: 0.6667, Adjusted R-squared: 0.6297
## F-statistic: 18 on 1 and 9 DF, p-value: 0.002165