library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
anscombe
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
df<-anscombe
df1<-anscombe %>% select(x1,y1)
x<-anscombe[1:4]
y<-anscombe[5:8]
p1<-df %>% ggplot(aes(x=x1, y=y1))+
geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p1

ggsave("p1.png")
## Saving 7 x 5 in image
# medie, varianza, dev standard
summarise_all(df, mean)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 9 9 9 9 7.500909 7.500909 7.5 7.500909
summarise_all(df, var)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 11 11 11 11 4.127269 4.127629 4.12262 4.123249
summarise_all(df, sd)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 3.316625 3.316625 3.316625 3.316625 2.031568 2.031657 2.030424 2.030579
# correlazione x,y
summarise(df, cor(x1,y1))
## cor(x1, y1)
## 1 0.8164205
summarise(df, cor(x2,y2))
## cor(x2, y2)
## 1 0.8162365
summarise(df, cor(x3,y3))
## cor(x3, y3)
## 1 0.8162867
summarise(df, cor(x4,y4))
## cor(x4, y4)
## 1 0.8165214
#Fitting lines
fit1<-lm(as.matrix(x1)~as.matrix(y1), data=df)
fit2<-lm(as.matrix(x2)~as.matrix(y2), data=df)
fit3<-lm(as.matrix(x3)~as.matrix(y3), data=df)
fit4<-lm(as.matrix(x4)~as.matrix(y4), data=df)
# printing fitted coefficients
summary(fit1)$coefficients[,1]
## (Intercept) as.matrix(y1)
## -0.9975311 1.3328426
summary(fit2)$coefficients[,1]
## (Intercept) as.matrix(y2)
## -0.9948419 1.3324841
summary(fit3)$coefficients[,1]
## (Intercept) as.matrix(y3)
## -1.000315 1.333375
summary(fit4)$coefficients[,1]
## (Intercept) as.matrix(y4)
## -1.003640 1.333657
summary(fit1)
##
## Call:
## lm(formula = as.matrix(x1) ~ as.matrix(y1), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6522 -1.5117 -0.2657 1.2341 3.8946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9975 2.4344 -0.410 0.69156
## as.matrix(y1) 1.3328 0.3142 4.241 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.019 on 9 degrees of freedom
## Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
## F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
p2<-df %>%
ggplot(aes(x=x2, y=y2))+
geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p2

p3<-df %>%
ggplot(aes(x=x3, y=y3))+
geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p3

p4<-df %>%
ggplot(aes(x=x4, y=y4))+
geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p4

p5<-grid.arrange(p1,p2,p3,p4)

ggsave("p5.png")
## Saving 7 x 5 in image
# originally published on https://marcellodelbono.it/anscombes-quartet-importance-data-visualization/