library(tidyr)
library(car)
## Loading required package: carData
library (agricolae)
library(readr)
df <- data.frame(
A = c(19.8, 21.1, 28.2, 22.3, 22.5, 28.9, 23.8,16.9, NA, NA, NA, NA),
B = c(31.6, 32.8, 41.1, 36.8, 37, 35.6, 32.2, 43.9, 37.5, 25.2, NA, NA),
C = c(37.2, 30.2, 26.6, 31.7, 26.8, 28.6, 29.2, 22.9, 38, 33.9, 26.2, 40.5)
)
print(df)
## A B C
## 1 19.8 31.6 37.2
## 2 21.1 32.8 30.2
## 3 28.2 41.1 26.6
## 4 22.3 36.8 31.7
## 5 22.5 37.0 26.8
## 6 28.9 35.6 28.6
## 7 23.8 32.2 29.2
## 8 16.9 43.9 22.9
## 9 NA 37.5 38.0
## 10 NA 25.2 33.9
## 11 NA NA 26.2
## 12 NA NA 40.5
boxplot(df,
main = "Contaminación por dioxido de nitrógeno (NO2)",
xlab = "ubicación",
ylab = "concentración de NO2")
df_long <- df %>%
pivot_longer(
cols = c(A, B, C),
names_to = "ubicacion",
values_to = "NO2"
)
print(df_long)
## # A tibble: 36 × 2
## ubicacion NO2
## <chr> <dbl>
## 1 A 19.8
## 2 B 31.6
## 3 C 37.2
## 4 A 21.1
## 5 B 32.8
## 6 C 30.2
## 7 A 28.2
## 8 B 41.1
## 9 C 26.6
## 10 A 22.3
## # ℹ 26 more rows
df_long <- na.omit(df_long)
no2_aov <- aov(NO2 ~ ubicacion, data= df_long)
summary(no2_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## ubicacion 2 696.3 348.2 13.72 7.75e-05 ***
## Residuals 27 685.3 25.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Dado el p-valor, rechazamos la hipotesis nula y aceptamos que existen diferencias significativas estadisticamente entre las concentraciones medias de NO1 del almenos dos de los tres lugares.
plot(no2_aov, which = 1)
\[H_0 : \sigma^2_{1} = ... =
\sigma^2_{k}\] \[H_1 :\text{ Al menos
una varianza es disntinta}\]
df_long$ubicacion <- as.factor(df_long$ubicacion)
df_long$residuals <- resid(no2_aov)
leveneTest(residuals ~ ubicacion, data = df_long)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.4384 0.6496
## 27
Dado que p_value=0.6496>0.05, no se rechaza la homocedasticidad.
bartlett.test(residuals ~ ubicacion, data = df_long)
##
## Bartlett test of homogeneity of variances
##
## data: residuals by ubicacion
## Bartlett's K-squared = 0.7074, df = 2, p-value = 0.7021
Em ambos test, no hay evidencia suficiente para rechazar la homeoceasticidad.
shapiro.test(df_long$residuals)
##
## Shapiro-Wilk normality test
##
## data: df_long$residuals
## W = 0.97995, p-value = 0.8243
Los residuos son normales. d)
pairwise.t.test(df_long$NO2, df_long$ubicacion, p.adjust.method="bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: df_long$NO2 and df_long$ubicacion
##
## A B
## B 5.3e-05 -
## C 0.0049 0.1558
##
## P value adjustment method: bonferroni
df_long
## # A tibble: 30 × 3
## ubicacion NO2 residuals
## <fct> <dbl> <dbl>
## 1 A 19.8 -3.14
## 2 B 31.6 -3.77
## 3 C 37.2 6.22
## 4 A 21.1 -1.84
## 5 B 32.8 -2.57
## 6 C 30.2 -0.783
## 7 A 28.2 5.26
## 8 B 41.1 5.73
## 9 C 26.6 -4.38
## 10 A 22.3 -0.638
## # ℹ 20 more rows
modelo = lm(NO2 ~ ubicacion, data= df_long)
anova = aov(modelo)
print(anova)
## Call:
## aov(formula = modelo)
##
## Terms:
## ubicacion Residuals
## Sum of Squares 696.3036 685.3164
## Deg. of Freedom 2 27
##
## Residual standard error: 5.038064
## Estimated effects may be unbalanced
prueba_lsd <- LSD.test(y=no2_aov, "ubicacion", group=TRUE, console=TRUE)
##
## Study: no2_aov ~ "ubicacion"
##
## LSD t Test for NO2
##
## Mean Square Error: 25.38209
##
## ubicacion, means and individual ( 95 %) CI
##
## NO2 std r se LCL UCL Min Max Q25 Q50 Q75
## A 22.93750 4.040133 8 1.781225 19.28273 26.59227 16.9 28.9 20.775 22.4 24.900
## B 35.37000 5.261823 10 1.593176 32.10107 38.63893 25.2 43.9 32.350 36.2 37.375
## C 30.98333 5.409391 12 1.454364 27.99923 33.96744 22.9 40.5 26.750 29.7 34.725
##
## Alpha: 0.05 ; DF Error: 27
## Critical Value of t: 2.051831
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Treatments with the same letter are not significantly different.
##
## NO2 groups
## B 35.37000 a
## C 30.98333 a
## A 22.93750 b
prueba_lsd
## $statistics
## MSerror Df Mean CV
## 25.38209 27 30.3 16.62727
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none ubicacion 3 0.05
##
## $means
## NO2 std r se LCL UCL Min Max Q25 Q50 Q75
## A 22.93750 4.040133 8 1.781225 19.28273 26.59227 16.9 28.9 20.775 22.4 24.900
## B 35.37000 5.261823 10 1.593176 32.10107 38.63893 25.2 43.9 32.350 36.2 37.375
## C 30.98333 5.409391 12 1.454364 27.99923 33.96744 22.9 40.5 26.750 29.7 34.725
##
## $comparison
## NULL
##
## $groups
## NO2 groups
## B 35.37000 a
## C 30.98333 a
## A 22.93750 b
##
## attr(,"class")
## [1] "group"
prueba_tukey <- TukeyHSD(no2_aov, conf.level = 0.95)
prueba_tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = NO2 ~ ubicacion, data = df_long)
##
## $ubicacion
## diff lwr upr p adj
## B-A 12.432500 6.507278 18.3577222 0.0000513
## C-A 8.045833 2.344286 13.7473810 0.0045194
## C-B -4.386667 -9.735193 0.9618592 0.1234729
prueba_duncan <- duncan.test(no2_aov, "ubicacion", alpha=0.05, group=TRUE, )
prueba_duncan
## $statistics
## MSerror Df Mean CV
## 25.38209 27 30.3 16.62727
##
## $parameters
## test name.t ntr alpha
## Duncan ubicacion 3 0.05
##
## $duncan
## NULL
##
## $means
## NO2 std r se Min Max Q25 Q50 Q75
## A 22.93750 4.040133 8 1.781225 16.9 28.9 20.775 22.4 24.900
## B 35.37000 5.261823 10 1.593176 25.2 43.9 32.350 36.2 37.375
## C 30.98333 5.409391 12 1.454364 22.9 40.5 26.750 29.7 34.725
##
## $comparison
## NULL
##
## $groups
## NO2 groups
## B 35.37000 a
## C 30.98333 a
## A 22.93750 b
##
## attr(,"class")
## [1] "group"
Existe diferencia significativa entre - B y A - C y A
Ejercicio 2.
trigo <- read_csv("/Users/usermac/Documents/especializacion/metodos estadisticos/trigo.csv")
## Rows: 96 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): abono
## dbl (3): semilla, parcela, cosecha
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(trigo)
## # A tibble: 6 × 4
## semilla parcela abono cosecha
## <dbl> <dbl> <chr> <dbl>
## 1 1 1 A 4.73
## 2 2 2 A 4.76
## 3 1 3 A 4.17
## 4 2 4 A 5.23
## 5 1 1 A 4.39
## 6 2 2 A 5.15
boxplot(trigo$cosecha ~ trigo$abono,
main = "Cosecha por tipo de abono",
xlab= "Abono",
ylab = "Cosecha")
cosecha_aov <- aov(cosecha ~ abono, data= trigo)
summary(cosecha_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## abono 2 0.910 0.4549 4.758 0.0108 *
## Residuals 93 8.891 0.0956
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(no2_aov, which = 1)
prueba_lsd <- LSD.test(y=cosecha_aov, "abono", group=TRUE, console=TRUE)
##
## Study: cosecha_aov ~ "abono"
##
## LSD t Test for cosecha
##
## Mean Square Error: 0.09560225
##
## abono, means and individual ( 95 %) CI
##
## cosecha std r se LCL UCL Min Max Q25 Q50
## A 4.741250 0.3182589 32 0.05465867 4.632709 4.849791 4.17 5.23 4.5325 4.730
## B 4.835938 0.3071498 32 0.05465867 4.727396 4.944479 4.24 5.58 4.6975 4.895
## C 4.978125 0.3019553 32 0.05465867 4.869584 5.086666 4.47 5.67 4.7550 5.005
## Q75
## A 5.0725
## B 5.0250
## C 5.2150
##
## Alpha: 0.05 ; DF Error: 93
## Critical Value of t: 1.985802
##
## least Significant Difference: 0.1535006
##
## Treatments with the same letter are not significantly different.
##
## cosecha groups
## C 4.978125 a
## B 4.835938 ab
## A 4.741250 b
prueba_lsd
## $statistics
## MSerror Df Mean CV t.value LSD
## 0.09560225 93 4.851771 6.372851 1.985802 0.1535006
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none abono 3 0.05
##
## $means
## cosecha std r se LCL UCL Min Max Q25 Q50
## A 4.741250 0.3182589 32 0.05465867 4.632709 4.849791 4.17 5.23 4.5325 4.730
## B 4.835938 0.3071498 32 0.05465867 4.727396 4.944479 4.24 5.58 4.6975 4.895
## C 4.978125 0.3019553 32 0.05465867 4.869584 5.086666 4.47 5.67 4.7550 5.005
## Q75
## A 5.0725
## B 5.0250
## C 5.2150
##
## $comparison
## NULL
##
## $groups
## cosecha groups
## C 4.978125 a
## B 4.835938 ab
## A 4.741250 b
##
## attr(,"class")
## [1] "group"
Esto quiere decir que exista una diferencia significativa de medias entre - C y A
prueba_tukey <- TukeyHSD(cosecha_aov, conf.level = 0.95)
prueba_tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cosecha ~ abono, data = trigo)
##
## $abono
## diff lwr upr p adj
## B-A 0.0946875 -0.08942436 0.2787994 0.4415513
## C-A 0.2368750 0.05276314 0.4209869 0.0079604
## C-B 0.1421875 -0.04192436 0.3262994 0.1625203
El test te tukey conformar ue la unica diferencia que existe esta entre C y A, ya que en los demas, el intervalo contiene el cero y ademas su p value es mucho mayor que 0.05.
Para concluir, como el test de LSD nos da una estadistico de 4.9 para C, este es el demayor producción, al menos que A, pero el test no nos da significativo respecto a B.
vino <- read_csv("/Users/usermac/Documents/especializacion/metodos estadisticos/cata-vinos.csv")
## Rows: 15 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): vinoA, vinoB, vinoC
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(vino)
## # A tibble: 6 × 3
## vinoA vinoB vinoC
## <dbl> <dbl> <dbl>
## 1 7.8 8 7.8
## 2 7.9 7.6 7.6
## 3 8.6 6.9 8
## 4 8 7.7 8
## 5 8.1 7.4 8
## 6 8.7 7.2 7.9
vino_long <- vino %>%
pivot_longer(
cols = c(vinoA, vinoB, vinoC),
names_to = "tipo_vino",
values_to = "puntuacion"
)
head(vino_long)
## # A tibble: 6 × 2
## tipo_vino puntuacion
## <chr> <dbl>
## 1 vinoA 7.8
## 2 vinoB 8
## 3 vinoC 7.8
## 4 vinoA 7.9
## 5 vinoB 7.6
## 6 vinoC 7.6
boxplot(vino_long$puntuacion ~ vino_long$tipo_vino)
vino_aov<-aov(puntuacion ~tipo_vino, data=vino_long)
summary(vino_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## tipo_vino 2 3.048 1.5242 14.91 1.28e-05 ***
## Residuals 42 4.295 0.1023
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prueba_lsd <- LSD.test(y=vino_aov, "tipo_vino", group=TRUE, console=TRUE)
##
## Study: vino_aov ~ "tipo_vino"
##
## LSD t Test for puntuacion
##
## Mean Square Error: 0.102254
##
## tipo_vino, means and individual ( 95 %) CI
##
## puntuacion std r se LCL UCL Min Max Q25 Q50 Q75
## vinoA 8.060000 0.3397478 15 0.08256471 7.893378 8.226622 7.5 8.7 7.8 8.0 8.20
## vinoB 7.426667 0.3217512 15 0.08256471 7.260044 7.593289 6.9 8.0 7.2 7.4 7.65
## vinoC 7.806667 0.2963267 15 0.08256471 7.640044 7.973289 7.3 8.5 7.6 7.8 8.00
##
## Alpha: 0.05 ; DF Error: 42
## Critical Value of t: 2.018082
##
## least Significant Difference: 0.2356396
##
## Treatments with the same letter are not significantly different.
##
## puntuacion groups
## vinoA 8.060000 a
## vinoC 7.806667 b
## vinoB 7.426667 c
prueba_lsd
## $statistics
## MSerror Df Mean CV t.value LSD
## 0.102254 42 7.764444 4.118411 2.018082 0.2356396
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none tipo_vino 3 0.05
##
## $means
## puntuacion std r se LCL UCL Min Max Q25 Q50 Q75
## vinoA 8.060000 0.3397478 15 0.08256471 7.893378 8.226622 7.5 8.7 7.8 8.0 8.20
## vinoB 7.426667 0.3217512 15 0.08256471 7.260044 7.593289 6.9 8.0 7.2 7.4 7.65
## vinoC 7.806667 0.2963267 15 0.08256471 7.640044 7.973289 7.3 8.5 7.6 7.8 8.00
##
## $comparison
## NULL
##
## $groups
## puntuacion groups
## vinoA 8.060000 a
## vinoC 7.806667 b
## vinoB 7.426667 c
##
## attr(,"class")
## [1] "group"
Ejercicio 4.
int <- data.frame(
instituto_1 = c(5.5, 5.2, 5.9, 7.1, 6.2, 5.9, 5.3, 6.2),
instituto_2 = c(6.1, 7.2, 5.5, 6.7, 7.6, 5.9, 8.1, 8.3),
instituto_3 = c(4.9, 5.5, 6.1, 6.1, 6.2, 6.4, 6.9, 4.5),
instituto_4 = c(3.2, 3.3, 5.5, 5.7, 6.0, 6.1, 4.7, 5.1),
instituto_5 = c(6.7, 5.8, 5.4, 5.5, 4.9, 6.2, 6.1, 7.0)
)
int
## instituto_1 instituto_2 instituto_3 instituto_4 instituto_5
## 1 5.5 6.1 4.9 3.2 6.7
## 2 5.2 7.2 5.5 3.3 5.8
## 3 5.9 5.5 6.1 5.5 5.4
## 4 7.1 6.7 6.1 5.7 5.5
## 5 6.2 7.6 6.2 6.0 4.9
## 6 5.9 5.9 6.4 6.1 6.2
## 7 5.3 8.1 6.9 4.7 6.1
## 8 6.2 8.3 4.5 5.1 7.0
int_long <- int %>%
pivot_longer(
cols = c(instituto_1, instituto_2, instituto_3, instituto_4, instituto_5),
names_to = "institute",
values_to = "notas"
)
print(int_long)
## # A tibble: 40 × 2
## institute notas
## <chr> <dbl>
## 1 instituto_1 5.5
## 2 instituto_2 6.1
## 3 instituto_3 4.9
## 4 instituto_4 3.2
## 5 instituto_5 6.7
## 6 instituto_1 5.2
## 7 instituto_2 7.2
## 8 instituto_3 5.5
## 9 instituto_4 3.3
## 10 instituto_5 5.8
## # ℹ 30 more rows
boxplot(int_long$notas ~ int_long$institute)
int_aov <- aov(notas ~ institute, data=int_long)
int_aov
## Call:
## aov(formula = notas ~ institute, data = int_long)
##
## Terms:
## institute Residuals
## Sum of Squares 15.68500 27.27875
## Deg. of Freedom 4 35
##
## Residual standard error: 0.8828323
## Estimated effects may be unbalanced
prueba_lsd <- LSD.test(y=int_aov, "institute", group=TRUE, console=TRUE)
##
## Study: int_aov ~ "institute"
##
## LSD t Test for notas
##
## Mean Square Error: 0.7793929
##
## institute, means and individual ( 95 %) CI
##
## notas std r se LCL UCL Min Max Q25 Q50
## instituto_1 5.9125 0.6128097 8 0.3121284 5.278846 6.546154 5.2 7.1 5.450 5.90
## instituto_2 6.9250 1.0430039 8 0.3121284 6.291346 7.558654 5.5 8.3 6.050 6.95
## instituto_3 5.8250 0.8013382 8 0.3121284 5.191346 6.458654 4.5 6.9 5.350 6.10
## instituto_4 4.9500 1.1439281 8 0.3121284 4.316346 5.583654 3.2 6.1 4.350 5.30
## instituto_5 5.9500 0.6948792 8 0.3121284 5.316346 6.583654 4.9 7.0 5.475 5.95
## Q75
## instituto_1 6.200
## instituto_2 7.725
## instituto_3 6.250
## instituto_4 5.775
## instituto_5 6.325
##
## Alpha: 0.05 ; DF Error: 35
## Critical Value of t: 2.030108
##
## least Significant Difference: 0.8961224
##
## Treatments with the same letter are not significantly different.
##
## notas groups
## instituto_2 6.9250 a
## instituto_5 5.9500 b
## instituto_1 5.9125 b
## instituto_3 5.8250 bc
## instituto_4 4.9500 c
prueba_lsd
## $statistics
## MSerror Df Mean CV t.value LSD
## 0.7793929 35 5.9125 14.93162 2.030108 0.8961224
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none institute 5 0.05
##
## $means
## notas std r se LCL UCL Min Max Q25 Q50
## instituto_1 5.9125 0.6128097 8 0.3121284 5.278846 6.546154 5.2 7.1 5.450 5.90
## instituto_2 6.9250 1.0430039 8 0.3121284 6.291346 7.558654 5.5 8.3 6.050 6.95
## instituto_3 5.8250 0.8013382 8 0.3121284 5.191346 6.458654 4.5 6.9 5.350 6.10
## instituto_4 4.9500 1.1439281 8 0.3121284 4.316346 5.583654 3.2 6.1 4.350 5.30
## instituto_5 5.9500 0.6948792 8 0.3121284 5.316346 6.583654 4.9 7.0 5.475 5.95
## Q75
## instituto_1 6.200
## instituto_2 7.725
## instituto_3 6.250
## instituto_4 5.775
## instituto_5 6.325
##
## $comparison
## NULL
##
## $groups
## notas groups
## instituto_2 6.9250 a
## instituto_5 5.9500 b
## instituto_1 5.9125 b
## instituto_3 5.8250 bc
## instituto_4 4.9500 c
##
## attr(,"class")
## [1] "group"
Acá las diferencias.