library(ggplot2)
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(rstatix)
##
## Dołączanie pakietu: 'rstatix'
## Następujący obiekt został zakryty z 'package:stats':
##
## filter
library(dplyr)
##
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
##
## filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
##
## intersect, setdiff, setequal, union
library(carData)
data("Salaries")
ggplot(Salaries,aes(sex,salary))+geom_boxplot()
prop.test(table(Salaries[c(1,2)]))
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(Salaries[c(1, 2)])
## X-squared = 4.6487, df = 2, p-value = 0.09785
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.3582090 0.4062500 0.4924812
#zmienne rank i discipline są niezależne
prop.test(table(Salaries[c(1,5)]))
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(Salaries[c(1, 5)])
## X-squared = 8.5259, df = 2, p-value = 0.01408
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.16417910 0.15625000 0.06766917
#zmienne rank i sex są zależne
prop.test(table(Salaries[c(2,5)]))
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(Salaries[c(2, 5)])
## X-squared = 2.7708e-30, df = 1, p-value = 1
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.05883604 0.06328663
## sample estimates:
## prop 1 prop 2
## 0.09944751 0.09722222
#zmienne discipline i sex są niezależne
Salaries %>%
group_by(rank,discipline,sex) %>%
shapiro_test(salary)
## # A tibble: 12 x 6
## rank discipline sex variable statistic p
## <fct> <fct> <fct> <chr> <dbl> <dbl>
## 1 AsstProf A Female salary 0.870 0.226
## 2 AsstProf A Male salary 0.941 0.300
## 3 AsstProf B Female salary 0.889 0.354
## 4 AsstProf B Male salary 0.941 0.0458
## 5 AssocProf A Female salary 0.863 0.269
## 6 AssocProf A Male salary 0.878 0.0113
## 7 AssocProf B Female salary 0.635 0.00117
## 8 AssocProf B Male salary 0.967 0.416
## 9 Prof A Female salary 0.934 0.549
## 10 Prof A Male salary 0.952 0.000259
## 11 Prof B Female salary 0.974 0.923
## 12 Prof B Male salary 0.978 0.0435
rozkłady nie są normalne
usunięcie wartości odstających
w.odstajace <- Salaries %>%
group_by(rank,discipline,sex) %>%
identify_outliers(salary)
w.odstajace
## # A tibble: 18 x 8
## rank discipline sex yrs.since.phd yrs.service salary is.outlier is.extreme
## <fct> <fct> <fct> <int> <int> <int> <lgl> <lgl>
## 1 Asst~ A Fema~ 7 6 63100 TRUE FALSE
## 2 Asst~ A Male 3 1 63900 TRUE FALSE
## 3 Asst~ A Male 2 0 85000 TRUE TRUE
## 4 Asst~ A Male 8 4 81035 TRUE FALSE
## 5 Asso~ A Fema~ 25 22 62884 TRUE FALSE
## 6 Asso~ A Male 14 8 100102 TRUE FALSE
## 7 Asso~ A Male 9 7 70000 TRUE FALSE
## 8 Asso~ A Male 11 1 104800 TRUE FALSE
## 9 Asso~ A Male 45 39 70700 TRUE FALSE
## 10 Asso~ A Male 10 1 108413 TRUE FALSE
## 11 Asso~ A Male 11 8 104121 TRUE FALSE
## 12 Asso~ B Fema~ 14 7 109650 TRUE TRUE
## 13 Asso~ B Fema~ 12 9 71065 TRUE TRUE
## 14 Asso~ B Male 13 11 126431 TRUE FALSE
## 15 Prof A Male 29 7 204000 TRUE FALSE
## 16 Prof A Male 42 18 194800 TRUE FALSE
## 17 Prof A Male 43 43 205500 TRUE FALSE
## 18 Prof B Male 38 38 231545 TRUE FALSE
library(extraoperators)
Salaries$salary[Salaries$salary%in%w.odstajace$salary] <- NA
library(stats)
Salaries <- na.omit(Salaries)
Salaries <- mutate(Salaries,log=log10(salary))
Salaries %>%
group_by(rank,discipline,sex) %>%
shapiro_test(log)
## # A tibble: 12 x 6
## rank discipline sex variable statistic p
## <fct> <fct> <fct> <chr> <dbl> <dbl>
## 1 AsstProf A Female log 0.813 0.104
## 2 AsstProf A Male log 0.952 0.560
## 3 AsstProf B Female log 0.896 0.387
## 4 AsstProf B Male log 0.931 0.0218
## 5 AssocProf A Female log 0.978 0.717
## 6 AssocProf A Male log 0.891 0.0587
## 7 AssocProf B Female log 0.916 0.517
## 8 AssocProf B Male log 0.981 0.840
## 9 Prof A Female log 0.936 0.575
## 10 Prof A Male log 0.985 0.212
## 11 Prof B Female log 0.976 0.939
## 12 Prof B Male log 0.986 0.236
rozkłady są normalne
test ancova
Salaries %>%
anova_test(log~rank*discipline*sex)
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 rank 2 366 189.135 3.89e-57 * 0.508000
## 2 discipline 1 366 57.685 2.59e-13 * 0.136000
## 3 sex 1 366 0.312 5.77e-01 0.000851
## 4 rank:discipline 2 366 1.527 2.19e-01 0.008000
## 5 rank:sex 2 366 0.089 9.15e-01 0.000486
## 6 discipline:sex 1 366 0.713 3.99e-01 0.002000
## 7 rank:discipline:sex 2 366 0.315 7.30e-01 0.002000
Salaries %>%
tukey_hsd(log~rank)
## # A tibble: 3 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 rank AsstProf AssocProf 0 0.0620 0.0270 0.0971 0.000113
## 2 rank AsstProf Prof 0 0.182 0.155 0.208 0
## 3 rank AssocProf Prof 0 0.120 0.0915 0.148 0
## # ... with 1 more variable: p.adj.signif <chr>
#każda grupa różni się istotnie między sobą
Salaries %>%
tukey_hsd(log~discipline)
## # A tibble: 1 x 9
## term group1 group2 null.value estimate conf.low conf.high p.adj p.adj.signif
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 disc~ A B 0 0.0369 0.0152 0.0587 9.3e-4 ***
#grupy różnią się między sobą
ggplot(Salaries,aes(yrs.since.phd,salary))+geom_point()
Salaries %>%
cor_test(yrs.since.phd,salary,method="pearson")
## # A tibble: 1 x 8
## var1 var2 cor statistic p conf.low conf.high method
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 yrs.since.phd salary 0.4 8.53 3.76e-16 0.314 0.484 Pearson
ggplot(Salaries,aes(yrs.service,salary))+geom_point()
Salaries %>%
cor_test(yrs.service,salary,method="pearson")
## # A tibble: 1 x 8
## var1 var2 cor statistic p conf.low conf.high method
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 yrs.service salary 0.33 6.72 6.57e-11 0.235 0.415 Pearson
w obu przypadkach zmienne są skorelowane
ggplot(Salaries,aes(yrs.since.phd,salary,color=rank))+geom_point()
Salaries %>%
group_by(rank) %>%
cor_test(yrs.since.phd,salary,method="pearson")
## # A tibble: 3 x 9
## rank var1 var2 cor statistic p conf.low conf.high method
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 AsstProf yrs.since.phd sala~ -0.17 -1.33 0.188 -0.399 0.0830 Pears~
## 2 AssocProf yrs.since.phd sala~ -0.23 -1.70 0.0957 -0.468 0.0412 Pears~
## 3 Prof yrs.since.phd sala~ -0.05 -0.809 0.419 -0.171 0.0716 Pears~
ggplot(Salaries,aes(yrs.service,salary,color=rank))+geom_point()
Salaries %>%
group_by(rank) %>%
cor_test(yrs.service,salary,method="pearson")
## # A tibble: 3 x 9
## rank var1 var2 cor statistic p conf.low conf.high method
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 AsstProf yrs.service salary 0.24 1.95 0.0554 -0.00546 0.463 Pears~
## 2 AssocProf yrs.service salary -0.21 -1.55 0.128 -0.452 0.0617 Pears~
## 3 Prof yrs.service salary -0.077 -1.24 0.215 -0.197 0.0449 Pears~
w obu przypadkach zmienne nie są skorelowane