The objectives of this problem set is to orient you to a number of activities in R. And to conduct a thoughtful exercise in appreciating the importance of data visualization. For each question create a code chunk or text response that completes/answers the activity or question requested. Finally, upon completion post your assignment on Rpubs and upload a link to it to the “Problem Set 2” assignmenet on Moodle.
anscombe data that is part of the library(datasets) in R. And assign that data to a new object called data.library(datasets)
dataanscombe <- anscombe
fBasics() package!)library(fBasics)
## Loading required package: timeDate
## Loading required package: timeSeries
##
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
colMeans(dataanscombe)
## x1 x2 x3 x4 y1 y2 y3 y4
## 9.000000 9.000000 9.000000 9.000000 7.500909 7.500909 7.500000 7.500909
colVars(dataanscombe)
## x1 x2 x3 x4 y1 y2 y3
## 11.000000 11.000000 11.000000 11.000000 4.127269 4.127629 4.122620
## y4
## 4.123249
pearsonTest(dataanscombe[,1],dataanscombe[,5])
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8164
## STATISTIC:
## t: 4.2415
## P VALUE:
## Alternative Two-Sided: 0.00217
## Alternative Less: 0.9989
## Alternative Greater: 0.001085
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4244, 0.9507
## Less: -1, 0.9388
## Greater: 0.5113, 1
##
## Description:
## Mon Jul 31 22:46:10 2017
pearsonTest(dataanscombe[,2],dataanscombe[,6])
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8162
## STATISTIC:
## t: 4.2386
## P VALUE:
## Alternative Two-Sided: 0.002179
## Alternative Less: 0.9989
## Alternative Greater: 0.001089
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4239, 0.9506
## Less: -1, 0.9387
## Greater: 0.5109, 1
##
## Description:
## Mon Jul 31 22:46:10 2017
pearsonTest(dataanscombe[,3],dataanscombe[,7])
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8163
## STATISTIC:
## t: 4.2394
## P VALUE:
## Alternative Two-Sided: 0.002176
## Alternative Less: 0.9989
## Alternative Greater: 0.001088
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4241, 0.9507
## Less: -1, 0.9387
## Greater: 0.511, 1
##
## Description:
## Mon Jul 31 22:46:10 2017
pearsonTest(dataanscombe[,4],dataanscombe[,8])
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8165
## STATISTIC:
## t: 4.243
## P VALUE:
## Alternative Two-Sided: 0.002165
## Alternative Less: 0.9989
## Alternative Greater: 0.001082
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4246, 0.9507
## Less: -1, 0.9388
## Greater: 0.5115, 1
##
## Description:
## Mon Jul 31 22:46:10 2017
x1y1 <- cor(dataanscombe[,1],dataanscombe[,5])
x2y2 <- cor(dataanscombe[,2],dataanscombe[,6])
x3y3 <- cor(dataanscombe[,3],dataanscombe[,7])
x4y4 <- cor(dataanscombe[,4],dataanscombe[,8])
correl1 <- rbind(x1y1,x2y2,x3y3,x4y4)
correl1
## [,1]
## x1y1 0.8164205
## x2y2 0.8162365
## x3y3 0.8162867
## x4y4 0.8165214
plot(dataanscombe$x1,dataanscombe$y1)
plot(dataanscombe$x2,dataanscombe$y2)
plot(dataanscombe$x3,dataanscombe$y3)
plot(dataanscombe$x4,dataanscombe$y4)
library(ggplot2)
library(gridExtra)
plot1 <- ggplot(dataanscombe) + geom_point(aes(x1, y1),color="blue",size = 2) + labs(title = "plot1") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) +
scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2))
plot2 <- ggplot(dataanscombe) + geom_point(aes(x2, y2),color="blue",size = 2) + labs(title = "plot2") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) + scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2))
plot3 <- ggplot(dataanscombe) + geom_point(aes(x3, y3),color="blue",size = 2) + labs(title = "plot3") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) +
scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2))
plot4 <- ggplot(dataanscombe) + geom_point(aes(x4, y4),color="blue",size = 2) + labs(title = "plot4") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) + scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2))
grid.arrange(plot1, plot2, plot3, plot4)
lm() function.lmx1y1 <- lm(dataanscombe$y1 ~ dataanscombe$x1)
lmx2y2 <- lm(dataanscombe$y2 ~ dataanscombe$x2)
lmx3y3 <- lm(dataanscombe$y3 ~ dataanscombe$x3)
lmx4y4 <- lm(dataanscombe$y4 ~ dataanscombe$x4)
summary(lmx1y1)
##
## Call:
## lm(formula = dataanscombe$y1 ~ dataanscombe$x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.92127 -0.45577 -0.04136 0.70941 1.83882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0001 1.1247 2.667 0.02573 *
## dataanscombe$x1 0.5001 0.1179 4.241 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
## F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
summary(lmx2y2)
##
## Call:
## lm(formula = dataanscombe$y2 ~ dataanscombe$x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9009 -0.7609 0.1291 0.9491 1.2691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.001 1.125 2.667 0.02576 *
## dataanscombe$x2 0.500 0.118 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
summary(lmx3y3)
##
## Call:
## lm(formula = dataanscombe$y3 ~ dataanscombe$x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1586 -0.6146 -0.2303 0.1540 3.2411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0025 1.1245 2.670 0.02562 *
## dataanscombe$x3 0.4997 0.1179 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
summary(lmx4y4)
##
## Call:
## lm(formula = dataanscombe$y4 ~ dataanscombe$x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.751 -0.831 0.000 0.809 1.839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0017 1.1239 2.671 0.02559 *
## dataanscombe$x4 0.4999 0.1178 4.243 0.00216 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared: 0.6667, Adjusted R-squared: 0.6297
## F-statistic: 18 on 1 and 9 DF, p-value: 0.002165
plot1 <- ggplot(dataanscombe) + geom_point(aes(x1, y1),color="red",size = 2) + labs(title = "plot1") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) +
scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2)) + aes(x1, y1) + geom_smooth(method = "lm",se=FALSE)
plot2 <- ggplot(dataanscombe) + geom_point(aes(x2, y2),color="red",size = 2) + labs(title = "plot2") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) + scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2)) + aes(x2, y2) + geom_smooth(method = "lm",se=FALSE)
plot3 <- ggplot(dataanscombe) + geom_point(aes(x3, y3),color="red",size = 2) + labs(title = "plot3") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) +
scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2)) + aes(x3, y3) + geom_smooth(method = "lm",se=FALSE)
plot4 <- ggplot(dataanscombe) + geom_point(aes(x4, y4),color="red",size = 2) + labs(title = "plot4") + theme(plot.title = element_text(hjust = 0.5),panel.border=element_rect(fill=NA)) + scale_x_continuous(breaks = seq(0, 20, 2)) + scale_y_continuous(breaks = seq(0, 12, 2)) + aes(x4, y4) + geom_smooth(method = "lm",se=FALSE)
grid.arrange(plot1, plot2, plot3, plot4)
library(fit.models)
modelfit <- fit.models(lmx1y1,lmx2y2,lmx3y3,lmx4y4)
summary(modelfit)
Calls: lmx1y1: lm(formula = dataanscombe\(y1 ~ dataanscombe\)x1) lmx2y2: lm(formula = dataanscombe\(y2 ~ dataanscombe\)x2) lmx3y3: lm(formula = dataanscombe\(y3 ~ dataanscombe\)x3) lmx4y4: lm(formula = dataanscombe\(y4 ~ dataanscombe\)x4)
Residual Statistics: Min 1Q Median 3Q Max lmx1y1: -1.921 -0.4558 -4.136e-02 0.7094 1.839 lmx2y2: -1.901 -0.7609 1.291e-01 0.9491 1.269 lmx3y3: -1.159 -0.6146 -2.303e-01 0.1540 3.241 lmx4y4: -1.751 -0.8310 1.110e-16 0.8090 1.839
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept): lmx1y1: 3.0001 1.1247 2.667 0.02573 * lmx2y2: 3.0009 1.1253 2.667 0.02576 * lmx3y3: 3.0025 1.1245 2.670 0.02562 * lmx4y4: 3.0017 1.1239 2.671 0.02559 *
dataanscombe$x1: lmx1y1: 0.5001 0.1179 4.241 0.00217 ** lmx2y2:
lmx3y3:
lmx4y4:
dataanscombe$x2: lmx1y1:
lmx2y2: 0.5000 0.1180 4.239 0.00218 ** lmx3y3:
lmx4y4:
dataanscombe$x3: lmx1y1:
lmx2y2:
lmx3y3: 0.4997 0.1179 4.239 0.00218 ** lmx4y4:
dataanscombe$x4: lmx1y1:
lmx2y2:
lmx3y3:
lmx4y4: 0.4999 0.1178 4.243 0.00216 ** — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual Scale Estimates: lmx1y1: 1.237 on 9 degrees of freedom lmx2y2: 1.237 on 9 degrees of freedom lmx3y3: 1.236 on 9 degrees of freedom lmx4y4: 1.236 on 9 degrees of freedom
Multiple R-squared: lmx1y1: 0.6665 lmx2y2: 0.6662 lmx3y3: 0.6663 lmx4y4: 0.6667
First Dataset: set of points following a linear relationship. Second Dataset: Not a linear relationship but more like a curve or polynomial relationship Dataset 3: stronger linear relationship; one outlier Dataset 4: Same values of x; one outlier