library(carData)
## Warning: package 'carData' was built under R version 4.2.3
data("Salaeries")
## Warning in data("Salaeries"): data set 'Salaeries' not found
attach(Salaries)
salary1 <- Salaries[sex ==c("male", "female"),]
## Warning in `==.default`(sex, c("male", "female")): longitud de objeto mayor no
## es múltiplo de la longitud de uno menor
## Warning in is.na(e1) | is.na(e2): longitud de objeto mayor no es múltiplo de la
## longitud de uno menor
str(salary1)
## 'data.frame': 0 obs. of 6 variables:
## $ rank : Factor w/ 3 levels "AsstProf","AssocProf",..:
## $ discipline : Factor w/ 2 levels "A","B":
## $ yrs.since.phd: int
## $ yrs.service : int
## $ sex : Factor w/ 2 levels "Female","Male":
## $ salary : int
head(Salaries)
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
summary(Salaries)
## rank discipline yrs.since.phd yrs.service sex
## AsstProf : 67 A:181 Min. : 1.00 Min. : 0.00 Female: 39
## AssocProf: 64 B:216 1st Qu.:12.00 1st Qu.: 7.00 Male :358
## Prof :266 Median :21.00 Median :16.00
## Mean :22.31 Mean :17.61
## 3rd Qu.:32.00 3rd Qu.:27.00
## Max. :56.00 Max. :60.00
## salary
## Min. : 57800
## 1st Qu.: 91000
## Median :107300
## Mean :113706
## 3rd Qu.:134185
## Max. :231545
salary1 <- Salaries[rank==c("Prof", "AsstProf", "AssocProf"),]
## Warning in `==.default`(rank, c("Prof", "AsstProf", "AssocProf")): longitud de
## objeto mayor no es múltiplo de la longitud de uno menor
## Warning in is.na(e1) | is.na(e2): longitud de objeto mayor no es múltiplo de la
## longitud de uno menor
str(salary1)
## 'data.frame': 134 obs. of 6 variables:
## $ rank : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 2 3 3 1 3 3 3 1 ...
## $ discipline : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 1 1 2 ...
## $ yrs.since.phd: int 19 45 6 30 18 2 12 37 36 11 ...
## $ yrs.service : int 18 39 6 23 18 0 3 23 31 0 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 1 2 2 2 2 2 ...
## $ salary : int 139750 115000 97000 175000 129000 78000 117150 124750 102580 77000 ...
salary1 <- Salaries[discipline==c("A", "B"),]
## Warning in `==.default`(discipline, c("A", "B")): longitud de objeto mayor no
## es múltiplo de la longitud de uno menor
## Warning in is.na(e1) | is.na(e2): longitud de objeto mayor no es múltiplo de la
## longitud de uno menor
str(salary1)
## 'data.frame': 201 obs. of 6 variables:
## $ rank : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 2 3 3 1 1 3 3 3 ...
## $ discipline : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 1 1 ...
## $ yrs.since.phd: int 20 45 6 45 18 7 2 12 37 31 ...
## $ yrs.service : int 16 39 6 45 18 2 0 3 23 26 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 1 2 2 2 2 2 ...
## $ salary : int 173200 115000 97000 147765 129000 79800 78000 117150 124750 89565 ...
table(salary1$sex, salary1$rank, salary1$discipline)
## , , = A
##
##
## AsstProf AssocProf Prof
## Female 1 2 4
## Male 12 10 63
##
## , , = B
##
##
## AsstProf AssocProf Prof
## Female 2 2 5
## Male 22 13 65
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
ggplot(Salaries, aes(x = sex, y = salary, colour=rank)) + geom_boxplot() + ggtitle("Boxplot Rank and Salary") + theme(plot.title = element_text(hjust = 0.5),axis.title.x = element_text(color="blue", size=14, face="bold"),axis.title.y = element_text(color="blue", size=14, face="bold")) + xlab("Rank") + ylab("Salary")
attach(Salaries)
## The following objects are masked from Salaries (pos = 4):
##
## discipline, rank, salary, sex, yrs.service, yrs.since.phd
prop.test(table(Salaries[c(1,2)]))
##
## 3-sample test for equality of proportions without continuity correction
##
## data: table(Salaries[c(1, 2)])
## X-squared = 4.6487, df = 2, p-value = 0.09785
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.3582090 0.4062500 0.4924812
Aquí sí hay similitud de proporciones.
prop.test(table(Salaries[c(1,5)]))
##
## 3-sample test for equality of proportions without continuity correction
##
## data: table(Salaries[c(1, 5)])
## X-squared = 8.5259, df = 2, p-value = 0.01408
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.16417910 0.15625000 0.06766917
Aquí no hay igualdad de proporciones.
prop.test(table(Salaries[c(2,5)]))
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(Salaries[c(2, 5)])
## X-squared = 2.7708e-30, df = 1, p-value = 1
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.05883604 0.06328663
## sample estimates:
## prop 1 prop 2
## 0.09944751 0.09722222
Aquí sí hay igualdad de proporciones.
library(nortest)
library(stats)
ANOVA<-aov(Salaries$salary~Salaries$rank*Salaries$discipline*Salaries$sex)
summary(ANOVA)
## Df Sum Sq Mean Sq F value
## Salaries$rank 2 1.432e+11 7.162e+10 138.105
## Salaries$discipline 1 1.843e+10 1.843e+10 35.540
## Salaries$sex 1 6.941e+08 6.941e+08 1.338
## Salaries$rank:Salaries$discipline 2 5.259e+08 2.629e+08 0.507
## Salaries$rank:Salaries$sex 2 1.780e+08 8.900e+07 0.172
## Salaries$discipline:Salaries$sex 1 4.620e+08 4.620e+08 0.891
## Salaries$rank:Salaries$discipline:Salaries$sex 2 1.324e+08 6.620e+07 0.128
## Residuals 385 1.996e+11 5.186e+08
## Pr(>F)
## Salaries$rank < 2e-16 ***
## Salaries$discipline 5.65e-09 ***
## Salaries$sex 0.248
## Salaries$rank:Salaries$discipline 0.603
## Salaries$rank:Salaries$sex 0.842
## Salaries$discipline:Salaries$sex 0.346
## Salaries$rank:Salaries$discipline:Salaries$sex 0.880
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resids <- ANOVA$residuals
plot(ANOVA)
shapiro.test(resids)
##
## Shapiro-Wilk normality test
##
## data: resids
## W = 0.96133, p-value = 1.011e-08
No tenemos normalidad, al ser p valor 0.000 < 0.05.
kruskal.test(salary~rank, data=Salaries)
##
## Kruskal-Wallis rank sum test
##
## data: salary by rank
## Kruskal-Wallis chi-squared = 194.01, df = 2, p-value < 2.2e-16
kruskal.test(salary~discipline, data=Salaries)
##
## Kruskal-Wallis rank sum test
##
## data: salary by discipline
## Kruskal-Wallis chi-squared = 11.36, df = 1, p-value = 0.0007504
Todos los valores son significativos, dado que sus p valores son menores que 0.05.
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
)
## # A tibble: 68 × 3
## package loadedversion source
## <chr> <chr> <chr>
## 1 bslib 0.4.2 CRAN (R 4.2.2)
## 2 cachem 1.0.6 CRAN (R 4.2.2)
## 3 callr 3.7.3 CRAN (R 4.2.3)
## 4 carData 3.0-5 CRAN (R 4.2.3)
## 5 cli 3.6.0 CRAN (R 4.2.2)
## 6 colorspace 2.1-0 CRAN (R 4.2.3)
## 7 crayon 1.5.2 CRAN (R 4.2.3)
## 8 devtools 2.4.5 CRAN (R 4.2.3)
## 9 digest 0.6.31 CRAN (R 4.2.2)
## 10 dplyr 1.1.2 CRAN (R 4.2.3)
## # ℹ 58 more rows