library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
anscombe
##    x1 x2 x3 x4    y1   y2    y3    y4
## 1  10 10 10  8  8.04 9.14  7.46  6.58
## 2   8  8  8  8  6.95 8.14  6.77  5.76
## 3  13 13 13  8  7.58 8.74 12.74  7.71
## 4   9  9  9  8  8.81 8.77  7.11  8.84
## 5  11 11 11  8  8.33 9.26  7.81  8.47
## 6  14 14 14  8  9.96 8.10  8.84  7.04
## 7   6  6  6  8  7.24 6.13  6.08  5.25
## 8   4  4  4 19  4.26 3.10  5.39 12.50
## 9  12 12 12  8 10.84 9.13  8.15  5.56
## 10  7  7  7  8  4.82 7.26  6.42  7.91
## 11  5  5  5  8  5.68 4.74  5.73  6.89
df<-anscombe
df1<-anscombe %>% select(x1,y1)

x<-anscombe[1:4]
y<-anscombe[5:8]


p1<-df %>% ggplot(aes(x=x1, y=y1))+
  geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p1

ggsave("p1.png")
## Saving 7 x 5 in image
# medie, varianza, dev standard
summarise_all(df,  mean)
##   x1 x2 x3 x4       y1       y2  y3       y4
## 1  9  9  9  9 7.500909 7.500909 7.5 7.500909
summarise_all(df,  var)
##   x1 x2 x3 x4       y1       y2      y3       y4
## 1 11 11 11 11 4.127269 4.127629 4.12262 4.123249
summarise_all(df,  sd)
##         x1       x2       x3       x4       y1       y2       y3       y4
## 1 3.316625 3.316625 3.316625 3.316625 2.031568 2.031657 2.030424 2.030579
# correlazione x,y
summarise(df,  cor(x1,y1))
##   cor(x1, y1)
## 1   0.8164205
summarise(df,  cor(x2,y2))
##   cor(x2, y2)
## 1   0.8162365
summarise(df,  cor(x3,y3))
##   cor(x3, y3)
## 1   0.8162867
summarise(df,  cor(x4,y4))
##   cor(x4, y4)
## 1   0.8165214
#Fitting lines
fit1<-lm(as.matrix(x1)~as.matrix(y1), data=df)
fit2<-lm(as.matrix(x2)~as.matrix(y2), data=df)
fit3<-lm(as.matrix(x3)~as.matrix(y3), data=df)
fit4<-lm(as.matrix(x4)~as.matrix(y4), data=df)

# printing fitted coefficients
summary(fit1)$coefficients[,1]
##   (Intercept) as.matrix(y1) 
##    -0.9975311     1.3328426
summary(fit2)$coefficients[,1]
##   (Intercept) as.matrix(y2) 
##    -0.9948419     1.3324841
summary(fit3)$coefficients[,1]
##   (Intercept) as.matrix(y3) 
##     -1.000315      1.333375
summary(fit4)$coefficients[,1]
##   (Intercept) as.matrix(y4) 
##     -1.003640      1.333657
summary(fit1)
## 
## Call:
## lm(formula = as.matrix(x1) ~ as.matrix(y1), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6522 -1.5117 -0.2657  1.2341  3.8946 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -0.9975     2.4344  -0.410  0.69156   
## as.matrix(y1)   1.3328     0.3142   4.241  0.00217 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.019 on 9 degrees of freedom
## Multiple R-squared:  0.6665, Adjusted R-squared:  0.6295 
## F-statistic: 17.99 on 1 and 9 DF,  p-value: 0.00217
p2<-df %>% 
  ggplot(aes(x=x2, y=y2))+
  geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p2

p3<-df %>% 
  ggplot(aes(x=x3, y=y3))+
  geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p3

p4<-df %>% 
  ggplot(aes(x=x4, y=y4))+
  geom_point(aes(colour ="red"), size=3)+ geom_smooth(method = "lm")+ theme(legend.position="none")
p4

p5<-grid.arrange(p1,p2,p3,p4)

ggsave("p5.png")
## Saving 7 x 5 in image
# originally published on https://marcellodelbono.it/anscombes-quartet-importance-data-visualization/