hist(Galton$Father)
abline(v=69, col="green",lwd=4)
abline(v=mean(Galton$Father),col="red")
abline(v=median(Galton$Father),col="blue")
library(lattice)
xyplot(Father~Height|Gender,data=Galton)
colnames(Galton)<-c("Familia","Padre","Madre","Genero","Estatura","Hijos")
names(Galton)
## [1] "Familia" "Padre" "Madre" "Genero" "Estatura" "Hijos"
head(Galton,n=10)
## # A tibble: 10 × 6
## Familia Padre Madre Genero Estatura Hijos
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 78.5 67 M 73.2 4
## 2 1 78.5 67 F 69.2 4
## 3 1 78.5 67 F 69 4
## 4 1 78.5 67 F 69 4
## 5 2 75.5 66.5 M 73.5 4
## 6 2 75.5 66.5 M 72.5 4
## 7 2 75.5 66.5 F 65.5 4
## 8 2 75.5 66.5 F 65.5 4
## 9 3 75 64 M 71 2
## 10 3 75 64 F 68 2
Galton01<-Galton %>%
dplyr::select(Padre,Madre,Estatura) %>%
group_by(Padre)
pairs.panels(Galton01)
e<-lm(Estatura~Padre+Madre,data=Galton)
summary(e)
##
## Call:
## lm(formula = Estatura ~ Padre + Madre, data = Galton)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1337 -2.6956 -0.1822 2.7731 11.6933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.31647 4.30906 5.179 2.76e-07 ***
## Padre 0.38049 0.04594 8.283 4.40e-16 ***
## Madre 0.28240 0.04920 5.740 1.30e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.388 on 887 degrees of freedom
## Multiple R-squared: 0.1097, Adjusted R-squared: 0.1077
## F-statistic: 54.64 on 2 and 887 DF, p-value: < 2.2e-16
\[ Ye= a+b1x+b2x \]
plot(resid(e))
abline(h=0, col="red", lwd=2)
inference(y = Estatura, x = Genero, data = Galton, statistic = "mean", type = "ci",
method = "theoretical", order = c("M","F"))
## Response variable: numerical, Explanatory variable: categorical (2 levels)
## n_M = 460, y_bar_M = 69.2261, s_M = 2.6408
## n_F = 430, y_bar_F = 64.1144, s_F = 2.3778
## 95% CI (M - F): (4.781 , 5.4424)
v<-ggplot(data=Galton, aes (x=Estatura,fill=Genero))+geom_histogram(binwidth = 1,color="black")
v+ggtitle("Histograma")
GaltonMayo23 <- read_csv("Library/Mobile Documents/com~apple~CloudDocs/R/GaltonMayo23.csv")
## Rows: 350 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Genero
## dbl (5): Familia, Padre, Madre, Estatura, Hijos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(GaltonMayo23)
## Familia Padre Madre Genero
## Min. : 1.00 Min. :1.590 Min. :1.480 Length:350
## 1st Qu.: 40.00 1st Qu.:1.720 1st Qu.:1.593 Class :character
## Median : 80.00 Median :1.770 Median :1.650 Mode :character
## Mean : 77.62 Mean :1.772 Mean :1.650
## 3rd Qu.:115.75 3rd Qu.:1.820 3rd Qu.:1.700
## Max. :151.00 Max. :1.960 Max. :1.920
## Estatura Hijos
## Min. :0.980 Min. :1.00
## 1st Qu.:1.650 1st Qu.:2.00
## Median :1.700 Median :3.00
## Mean :1.702 Mean :2.74
## 3rd Qu.:1.770 3rd Qu.:3.00
## Max. :1.960 Max. :5.00
Galton02<-GaltonMayo23 %>%
dplyr::select(Padre,Madre,Estatura) %>%
group_by(Padre)
library(readr)
pairs.panels(Galton02)
e1<-lm(Estatura~Padre+Madre,data=Galton02)
summary(e1)
##
## Call:
## lm(formula = Estatura ~ Padre + Madre, data = Galton02)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65625 -0.04729 0.00901 0.07140 0.23030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.71906 0.14360 5.007 8.79e-07 ***
## Padre 0.08022 0.07279 1.102 0.271
## Madre 0.50947 0.07169 7.107 6.81e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1049 on 347 degrees of freedom
## Multiple R-squared: 0.1553, Adjusted R-squared: 0.1504
## F-statistic: 31.89 on 2 and 347 DF, p-value: 1.93e-13
\[ Ye= a+b1x+b2x \]
plot(resid(e1))
abline(h=0, col="red", lwd= 3)
inference(y = Estatura, x = Genero, data = GaltonMayo23, statistic = "mean", type = "ci",
method = "theoretical", order = c("H","M"))
## Response variable: numerical, Explanatory variable: categorical (2 levels)
## n_H = 164, y_bar_H = 1.7425, s_H = 0.099
## n_M = 186, y_bar_M = 1.666, s_M = 0.1141
## 95% CI (H - M): (0.054 , 0.099)
v<-ggplot(data=GaltonMayo23, aes (x=Estatura,fill=Genero))+geom_histogram(color="black")
v+ggtitle("Histograma con datos de clase")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
####. Este histograma refleja que los hijos varones representados por el color rosa, son los mas altos. Aunque se perciben mujeres muy altas y un hombre muy bajito. Probablemente, la base de datos recogió estaturas de hijos en la niñez.
GaltonAgo23 <- read_csv("Library/Mobile Documents/com~apple~CloudDocs/GaltonAgo23.csv")
## Rows: 493 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Genero
## dbl (5): Familia, Padre, Madre, Estatura, Hijos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(GaltonAgo23)
## Familia Padre Madre Genero
## Min. : 1.0 Min. :1.590 Min. :1.480 Length:493
## 1st Qu.: 59.0 1st Qu.:1.720 1st Qu.:1.600 Class :character
## Median :106.0 Median :1.780 Median :1.650 Mode :character
## Mean :108.1 Mean :1.773 Mean :1.648
## 3rd Qu.:158.0 3rd Qu.:1.820 3rd Qu.:1.700
## Max. :220.0 Max. :1.960 Max. :1.920
## Estatura Hijos
## Min. :0.98 Min. :1.000
## 1st Qu.:1.60 1st Qu.:2.000
## Median :1.70 Median :3.000
## Mean :1.68 Mean :2.653
## 3rd Qu.:1.75 3rd Qu.:3.000
## Max. :1.96 Max. :5.000
Galton05<-GaltonAgo23 %>%
dplyr::select(Padre,Madre,Estatura) %>%
group_by(Padre)
pairs.panels(Galton05)
e5<-lm(Estatura~Padre+Madre,data=Galton05)
summary(e5)
##
## Call:
## lm(formula = Estatura ~ Padre + Madre, data = Galton05)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69079 -0.05751 0.01284 0.07085 0.26016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.87873 0.14574 6.030 3.24e-09 ***
## Padre -0.01496 0.07388 -0.202 0.84
## Madre 0.50248 0.07526 6.676 6.66e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1173 on 490 degrees of freedom
## Multiple R-squared: 0.09212, Adjusted R-squared: 0.08841
## F-statistic: 24.86 on 2 and 490 DF, p-value: 5.212e-11
\[ Ye=a+b1x+b2x \] #### Una correlación múltiple selaña que por cada centimetro de estatura que aumente la madre, la del hijo lo hará en 0.50, mientra que por cada centímetro que aumente la estatura del padre la del hijo disminuirá en -0.01 cm en promedio. Es decir, el padre no determina la estatura de los hijos. R2 de 0.09
plot(resid(e5))
abline(h=0, col="red", lwd=2)
j<-ggplot(data=GaltonAgo23, aes (x=Estatura,fill=Genero))+geom_histogram(color="black")
j+ggtitle("Histograma con datos de clase: agosto 2023")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
. se juntaron 643 observaciones
| Conc. | Prom | min | max | sd | rango | kurto |
|---|---|---|---|---|---|---|
| Familia | 131.59 | 1 | 266 | 74.31 | 265 | -1.07 |
| Padre | 1.74 | 1.58 | 1.99 | 0.08 | 0.41 | 0.25 |
| Madre | 1.63 | 1.48 | 1.85 | 0.06 | 0.37 | 0.56 |
| Genero | 1.51 | 1 | 2 | 0.5 | 1 | -2.0 |
| Estatura | 1.67 | 0.5 | 1.98 | 0.13 | 1.48 | 15.14 |
| Hijos | 2.78 | 1 | 7 | 1.08 | 6 | 1.89 |
library(readxl)
GALTON_MAY24 <- read_excel("Library/Mobile Documents/com~apple~CloudDocs/R/GALTON_MAY24.xlsx",
col_types = c("numeric", "numeric", "numeric", "text", "numeric", "numeric"))
## Warning: Expecting numeric in B80 / R80C2: got '1.70 '
## Warning: Expecting numeric in C80 / R80C3: got '1.60 '
## Warning: Expecting numeric in E80 / R80C5: got '1.67 '
## Warning: Expecting numeric in F80 / R80C6: got '2 '
## Warning: Expecting numeric in B81 / R81C2: got '1.70 '
## Warning: Expecting numeric in C81 / R81C3: got '1.60 '
## Warning: Expecting numeric in E81 / R81C5: got '1.63 '
## Warning: Expecting numeric in F81 / R81C6: got '2 '
## Warning: Expecting numeric in B82 / R82C2: got '1.77 '
## Warning: Expecting numeric in C82 / R82C3: got '1.54 '
## Warning: Expecting numeric in E82 / R82C5: got '1.63 '
## Warning: Expecting numeric in F82 / R82C6: got '1 '
## Warning: Expecting numeric in B83 / R83C2: got '1.76 '
## Warning: Expecting numeric in C83 / R83C3: got '1.61 '
## Warning: Expecting numeric in E83 / R83C5: got '1.54 '
## Warning: Expecting numeric in F83 / R83C6: got '3 '
## Warning: Expecting numeric in B84 / R84C2: got '1.76 '
## Warning: Expecting numeric in C84 / R84C3: got '1.61 '
## Warning: Expecting numeric in E84 / R84C5: got '1.62 '
## Warning: Expecting numeric in F84 / R84C6: got '3 '
## Warning: Expecting numeric in B85 / R85C2: got '1.76 '
## Warning: Expecting numeric in C85 / R85C3: got '1.61 '
## Warning: Expecting numeric in E85 / R85C5: got '1.57 '
## Warning: Expecting numeric in F85 / R85C6: got '3 '
## Warning: Expecting numeric in B86 / R86C2: got '1.82 '
## Warning: Expecting numeric in C86 / R86C3: got '1.65 '
## Warning: Expecting numeric in E86 / R86C5: got '1.76 '
## Warning: Expecting numeric in F86 / R86C6: got '2 '
## Warning: Expecting numeric in B87 / R87C2: got '1.82 '
## Warning: Expecting numeric in C87 / R87C3: got '1.65 '
## Warning: Expecting numeric in E87 / R87C5: got '1.70 '
## Warning: Expecting numeric in F87 / R87C6: got '2 '
## Warning: Expecting numeric in B88 / R88C2: got '1.77 '
## Warning: Expecting numeric in C88 / R88C3: got '1.62 '
## Warning: Expecting numeric in E88 / R88C5: got '1.65 '
## Warning: Expecting numeric in F88 / R88C6: got '3 '
## Warning: Expecting numeric in B89 / R89C2: got '1.77 '
## Warning: Expecting numeric in C89 / R89C3: got '1.62 '
## Warning: Expecting numeric in E89 / R89C5: got '1.70 '
## Warning: Expecting numeric in F89 / R89C6: got '3 '
## Warning: Expecting numeric in B90 / R90C2: got '1.77 '
## Warning: Expecting numeric in C90 / R90C3: got '1.62 '
## Warning: Expecting numeric in E90 / R90C5: got '1.60 '
## Warning: Expecting numeric in F90 / R90C6: got '3 '
## Warning: Expecting numeric in B91 / R91C2: got '1.84 '
## Warning: Expecting numeric in C91 / R91C3: got '1.67 '
## Warning: Expecting numeric in E91 / R91C5: got '1.76 '
## Warning: Expecting numeric in F91 / R91C6: got '1 '
## Warning: Expecting numeric in B92 / R92C2: got '1.79 '
## Warning: Expecting numeric in C92 / R92C3: got '1.57 '
## Warning: Expecting numeric in E92 / R92C5: got '1.62 '
## Warning: Expecting numeric in F92 / R92C6: got '2 '
## Warning: Expecting numeric in B93 / R93C2: got '1.79 '
## Warning: Expecting numeric in C93 / R93C3: got '1.57 '
## Warning: Expecting numeric in E93 / R93C5: got '1.64 '
## Warning: Expecting numeric in F93 / R93C6: got '2 '
## Warning: Expecting numeric in B94 / R94C2: got '1.90 '
## Warning: Expecting numeric in C94 / R94C3: got '1.66 '
## Warning: Expecting numeric in E94 / R94C5: got '1.85 '
## Warning: Expecting numeric in F94 / R94C6: got '2 '
## Warning: Expecting numeric in B95 / R95C2: got '1.90 '
## Warning: Expecting numeric in C95 / R95C3: got '1.66 '
## Warning: Expecting numeric in E95 / R95C5: got '1.63 '
## Warning: Expecting numeric in F95 / R95C6: got '2 '
## Warning: Expecting numeric in B96 / R96C2: got '1.76 '
## Warning: Expecting numeric in C96 / R96C3: got '1.63 '
## Warning: Expecting numeric in E96 / R96C5: got '1.78 '
## Warning: Expecting numeric in F96 / R96C6: got '3 '
## Warning: Expecting numeric in B97 / R97C2: got '1.76 '
## Warning: Expecting numeric in C97 / R97C3: got '1.63 '
## Warning: Expecting numeric in E97 / R97C5: got '1.83 '
## Warning: Expecting numeric in F97 / R97C6: got '3 '
## Warning: Expecting numeric in B98 / R98C2: got '1.76 '
## Warning: Expecting numeric in C98 / R98C3: got '1.68 '
## Warning: Expecting numeric in E98 / R98C5: got '1.64 '
## Warning: Expecting numeric in F98 / R98C6: got '3 '
## Warning: Expecting numeric in B99 / R99C2: got '1.86 '
## Warning: Expecting numeric in C99 / R99C3: got '1.68 '
## Warning: Expecting numeric in E99 / R99C5: got '1.65 '
## Warning: Expecting numeric in F99 / R99C6: got '2 '
## Warning: Expecting numeric in B100 / R100C2: got '1.86 '
## Warning: Expecting numeric in C100 / R100C3: got '1.68 '
## Warning: Expecting numeric in E100 / R100C5: got '1.65 '
## Warning: Expecting numeric in F100 / R100C6: got '2 '
GALTON24<-GALTON_MAY24
summary(GALTON24)
## FaMilia Padre Madre Genero
## Min. : 1.00 Min. :1.58 Min. :1.48 Length:1298
## 1st Qu.: 49.25 1st Qu.:1.70 1st Qu.:1.60 Class :character
## Median :113.00 Median :1.75 Median :1.64 Mode :character
## Mean :113.74 Mean :1.76 Mean :1.64
## 3rd Qu.:166.00 3rd Qu.:1.80 3rd Qu.:1.68
## Max. :266.00 Max. :1.99 Max. :1.92
## NA's :52 NA's :52
## Estatura Hijos
## Min. :0.500 Min. :1.000
## 1st Qu.:1.610 1st Qu.:2.000
## Median :1.690 Median :3.000
## Mean :1.679 Mean :2.723
## 3rd Qu.:1.750 3rd Qu.:3.000
## Max. :1.980 Max. :7.000
## NA's :21 NA's :21
library(lattice)
xyplot(Padre~Estatura|Genero,data=GALTON24)
ggplot(data = GALTON24, aes(x = Hijos , y = Estatura)) +
geom_jitter()+
geom_smooth(method = "lm")+
geom_hline(yintercept = 0, linetype = "dashed") +
xlab("Numero de hijos") +
ylab("Estatura")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
En esta gráfica podemos percibir a cinco niños de un metro y uno de medio metro entre los padres que tienen dos hijos
GALTTON_01<-GALTON24 %>%
dplyr::select(Padre,Madre,Estatura) %>%
group_by(Padre)
pairs.panels(GALTTON_01)
h<-lm(Estatura~Padre+Madre,data=GALTON24)
summary(h)
##
## Call:
## lm(formula = Estatura ~ Padre + Madre, data = GALTON24)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22426 -0.05879 0.01097 0.07452 0.27970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.86461 0.09369 9.228 < 2e-16 ***
## Padre 0.10583 0.04639 2.281 0.0227 *
## Madre 0.38290 0.05039 7.599 5.87e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1213 on 1243 degrees of freedom
## (52 observations deleted due to missingness)
## Multiple R-squared: 0.06441, Adjusted R-squared: 0.06291
## F-statistic: 42.79 on 2 and 1243 DF, p-value: < 2.2e-16
\[ Ye=a+b1x+b2x \]
plot(resid(h))
abline(h=0, col="red", lwd=5)
inference(y = Estatura, x = Genero, data = Galton, statistic = "mean", type = "ci",
method = "theoretical", order = c("M","F"))
## Response variable: numerical, Explanatory variable: categorical (2 levels)
## n_M = 460, y_bar_M = 69.2261, s_M = 2.6408
## n_F = 430, y_bar_F = 64.1144, s_F = 2.3778
## 95% CI (M - F): (4.781 , 5.4424)
z<-ggplot(data=GALTON24, aes (x=Estatura,fill=Genero))+geom_histogram(binwidth = 0.05,color="black")
z+ggtitle("Histograma Mayo 2024")
## Warning: Removed 21 rows containing non-finite values (`stat_bin()`).
| Correlaciones | Pearson |
|---|---|
| Datos Originales de Francis Galton | |
| Padre vs Madre | 0.07 |
| Padre vs Estatura | 0.28 |
| Madre vs Madre | 0.20 |
| Datos: Mayo de 2023 | |
| Padre vs Madre | 0.33 |
| Padre vs Estatura | 0.18 |
| Madre vs Madre | 0.39 |
| Datos: Agosto 2023 | |
| Padre vs Madre | 0.35 |
| Padre vs Estatura | 0.10 |
| Madre vs Madre | 0.30 |
| Datos:Mayo de 2024 | |
| Padre vs Madre | 0.26 |
| Padre vs Estatura | 0.15 |
| Madre vs Madre | 0.14 |