library(datasets)
anscombe data that is part of the library(datasets) in R and assigning that data to a new object called data.data=anscombe
summary(data)
## x1 x2 x3 x4 y1
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8 Min. : 4.260
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8 1st Qu.: 6.315
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8 Median : 7.580
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9 Mean : 7.501
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8 3rd Qu.: 8.570
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19 Max. :10.840
## y2 y3 y4
## Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median :8.140 Median : 7.11 Median : 7.040
## Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :9.260 Max. :12.74 Max. :12.500
library(fBasics)
mean(data$x1)
## [1] 9
var(data$x1)
## [1] 11
mean(data$x2)
## [1] 9
var(data$x2)
## [1] 11
mean(data$x3)
## [1] 9
var(data$x3)
## [1] 11
mean(data$x4)
## [1] 9
var(data$x4)
## [1] 11
mean(data$y1)
## [1] 7.500909
var(data$y1)
## [1] 4.127269
mean(data$y2)
## [1] 7.500909
var(data$y2)
## [1] 4.127629
mean(data$y3)
## [1] 7.5
var(data$y3)
## [1] 4.12262
mean(data$y4)
## [1] 7.500909
var(data$y4)
## [1] 4.123249
correlationTest(data$x1,data$y1)
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8164
## STATISTIC:
## t: 4.2415
## P VALUE:
## Alternative Two-Sided: 0.00217
## Alternative Less: 0.9989
## Alternative Greater: 0.001085
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4244, 0.9507
## Less: -1, 0.9388
## Greater: 0.5113, 1
##
## Description:
## Mon Nov 23 21:15:08 2020
correlationTest(data$x2,data$y2)
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8162
## STATISTIC:
## t: 4.2386
## P VALUE:
## Alternative Two-Sided: 0.002179
## Alternative Less: 0.9989
## Alternative Greater: 0.001089
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4239, 0.9506
## Less: -1, 0.9387
## Greater: 0.5109, 1
##
## Description:
## Mon Nov 23 21:15:08 2020
correlationTest(data$x3,data$y3)
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8163
## STATISTIC:
## t: 4.2394
## P VALUE:
## Alternative Two-Sided: 0.002176
## Alternative Less: 0.9989
## Alternative Greater: 0.001088
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4241, 0.9507
## Less: -1, 0.9387
## Greater: 0.511, 1
##
## Description:
## Mon Nov 23 21:15:08 2020
correlationTest(data$x4,data$y4)
##
## Title:
## Pearson's Correlation Test
##
## Test Results:
## PARAMETER:
## Degrees of Freedom: 9
## SAMPLE ESTIMATES:
## Correlation: 0.8165
## STATISTIC:
## t: 4.243
## P VALUE:
## Alternative Two-Sided: 0.002165
## Alternative Less: 0.9989
## Alternative Greater: 0.001082
## CONFIDENCE INTERVAL:
## Two-Sided: 0.4246, 0.9507
## Less: -1, 0.9388
## Greater: 0.5115, 1
##
## Description:
## Mon Nov 23 21:15:08 2020
library("ggplot2")
ggplot(data = data, mapping = aes(x = x1,y=y1)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x1 and y1") +
xlab("x1")+
ylab("y1")+
theme(plot.title = element_text(hjust = 0.5))
ggplot(data = data, mapping = aes(x = x2,y=y2)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x2 and y2") +
xlab("x2")+
ylab("y2")+
theme(plot.title = element_text(hjust = 0.5))
ggplot(data = data, mapping = aes(x = x3,y=y3)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x3 and y3") +
xlab("x3")+
ylab("y3")+
theme(plot.title = element_text(hjust = 0.5))
ggplot(data = data, mapping = aes(x = x4,y=y4)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x4 and y4") +
xlab("x4")+
ylab("y4")+
theme(plot.title = element_text(hjust = 0.5))
library("ggplot2")
library(gridExtra)
p1=ggplot(data = data, mapping = aes(x = x1,y=y1)) +
geom_point()+
ggtitle("Relationship between x1 and y1") +
xlab("x1")+
ylab("y1")+
theme(plot.title = element_text(hjust = 0.5))
p2=ggplot(data = data, mapping = aes(x = x2,y=y2)) +
geom_point()+
ggtitle("Relationship between x2 and y2") +
xlab("x2")+
ylab("y2")+
theme(plot.title = element_text(hjust = 0.5))
p3=ggplot(data = data, mapping = aes(x = x3,y=y3)) +
geom_point()+
ggtitle("Relationship between x3 and y3") +
xlab("x3")+
ylab("y3")+
theme(plot.title = element_text(hjust = 0.5))
p4=ggplot(data = data, mapping = aes(x = x4,y=y4)) +
geom_point()+
ggtitle("Relationship between x4 and y4") +
xlab("x4")+
ylab("y4")+
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1,p2,p3,p4, nrow = 2)
lm() function.fit1=lm(x1~y1,data = data)
summary(fit1)
##
## Call:
## lm(formula = x1 ~ y1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6522 -1.5117 -0.2657 1.2341 3.8946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9975 2.4344 -0.410 0.69156
## y1 1.3328 0.3142 4.241 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.019 on 9 degrees of freedom
## Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
## F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
fit2=lm(x2~y2,data = data)
summary(fit2)
##
## Call:
## lm(formula = x2 ~ y2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8516 -1.4315 -0.3440 0.8467 4.2017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9948 2.4354 -0.408 0.69246
## y2 1.3325 0.3144 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.02 on 9 degrees of freedom
## Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
fit3=lm(x3~y3,data = data)
summary(fit3)
##
## Call:
## lm(formula = x3 ~ y3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9869 -1.3733 -0.0266 1.3200 3.2133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0003 2.4362 -0.411 0.69097
## y3 1.3334 0.3145 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.019 on 9 degrees of freedom
## Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
fit4=lm(x4~y4,data = data)
summary(fit4)
##
## Call:
## lm(formula = x4 ~ y4, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7859 -1.4122 -0.1853 1.4551 3.3329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0036 2.4349 -0.412 0.68985
## y4 1.3337 0.3143 4.243 0.00216 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.018 on 9 degrees of freedom
## Multiple R-squared: 0.6667, Adjusted R-squared: 0.6297
## F-statistic: 18 on 1 and 9 DF, p-value: 0.002165
library("ggplot2")
library(gridExtra)
p1=ggplot(data = data, mapping = aes(x = x1,y=y1)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x1 and y1") +
xlab("x1")+
ylab("y1")+
geom_smooth(method = 'lm', color = 'black')+
theme(plot.title = element_text(hjust = 0.5))
p2=ggplot(data = data, mapping = aes(x = x2,y=y2)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x2 and y2") +
xlab("x2")+
ylab("y2")+
geom_smooth(method = 'lm', color = 'black')+
theme(plot.title = element_text(hjust = 0.5))
p3=ggplot(data = data, mapping = aes(x = x3,y=y3)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x3 and y3") +
xlab("x3")+
ylab("y3")+
geom_smooth(method = 'lm', color = 'black')+
theme(plot.title = element_text(hjust = 0.5))
p4=ggplot(data = data, mapping = aes(x = x4,y=y4)) +
geom_point()+
ggtitle("(Scatterplot)Relationship between x4 and y4") +
xlab("x4")+
ylab("y4")+
geom_smooth(method = 'lm', color = 'black')+
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1,p2,p3,p4, nrow = 2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
a1=anova(fit1)
a1
Analysis of Variance Table
Response: x1 Df Sum Sq Mean Sq F value Pr(>F)
y1 1 73.32 73.320 17.99 0.00217 ** Residuals 9 36.68 4.076
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
plot(a1)
plot(fit1)
a2=anova(fit2)
a2
Analysis of Variance Table
Response: x2 Df Sum Sq Mean Sq F value Pr(>F)
y2 1 73.287 73.287 17.966 0.002179 ** Residuals 9 36.713 4.079
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
plot(a2)
plot(fit2)
a3=anova(fit3)
a3
Analysis of Variance Table
Response: x3 Df Sum Sq Mean Sq F value Pr(>F)
y3 1 73.296 73.296 17.972 0.002176 ** Residuals 9 36.704 4.078
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
plot(a3)
plot(fit3)
a4=anova(fit4)
a4
Analysis of Variance Table
Response: x4 Df Sum Sq Mean Sq F value Pr(>F)
y4 1 73.338 73.338 18.003 0.002165 ** Residuals 9 36.662 4.074
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
plot(a4)
plot(fit4)
The Anscombe’s Quartet is a masterpiece dataset from the Greatest Statistician “Anscombe” which has four datasets resembling descriptive statistics but different distributions which points out the importance of Data Visualization. In this Problem set, we can clearly see the importance and essence of Data Visualization to ourselves from the Anscombe’s Quartlet. For clearly understanding of the data check out (https://en.wikipedia.org/wiki/Anscombe's_quartet). From above Scatterplots and the model Fits alongside its plots, we can clearly see that the datapoints of each of the datasets are scattered differently from each other.