Galton <- read_csv("Galton.csv")
## Rows: 898 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): FamilyID, Gender
## dbl (4): FatherHeight, MotherHeight, Height, NumKids
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Galton)
## # A tibble: 6 Ă— 6
## FamilyID FatherHeight MotherHeight Gender Height NumKids
## <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 78.5 67 M 73.2 4
## 2 1 78.5 67 F 69.2 4
## 3 1 78.5 67 F 69 4
## 4 1 78.5 67 F 69 4
## 5 2 75.5 66.5 M 73.5 4
## 6 2 75.5 66.5 M 72.5 4
ggpairs(Galton, columns = c(2, 3, 5))

modelo.fit <- lm(Height ~ FatherHeight, data = Galton)
summary(modelo.fit)
##
## Call:
## lm(formula = Height ~ FatherHeight, data = Galton)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2683 -2.6689 -0.2092 2.6342 11.9329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.11039 3.22706 12.120 <2e-16 ***
## FatherHeight 0.39938 0.04658 8.574 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.446 on 896 degrees of freedom
## Multiple R-squared: 0.07582, Adjusted R-squared: 0.07479
## F-statistic: 73.51 on 1 and 896 DF, p-value: < 2.2e-16
coef(modelo.fit)
## (Intercept) FatherHeight
## 39.1103868 0.3993813
head(modelo.fit$residuals,10)
## 1 2 3 4 5 6 7 8
## 2.738184 -1.261816 -1.461816 -1.461816 4.236328 3.236328 -3.763672 -3.763672
## 9 10
## 1.936018 -1.063982
ggplot(Galton , aes(x = FatherHeight, y = Height)) +
geom_point(color = "red") +
theme_minimal()

ggplot(Galton, aes(x = FatherHeight, y = Height)) +
geom_point(color = "red") +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, col = "blue") +
theme_light()
