library(ggplot2)
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(rstatix)
## 
## Dołączanie pakietu: 'rstatix'
## Następujący obiekt został zakryty z 'package:stats':
## 
##     filter
library(dplyr)
## 
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
## 
##     filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
## 
##     intersect, setdiff, setequal, union
library(carData)
data("Salaries")
ggplot(Salaries,aes(sex,salary))+geom_boxplot()

prop.test(table(Salaries[c(1,2)]))
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(Salaries[c(1, 2)])
## X-squared = 4.6487, df = 2, p-value = 0.09785
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.3582090 0.4062500 0.4924812
#zmienne rank i discipline są niezależne

prop.test(table(Salaries[c(1,5)]))
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(Salaries[c(1, 5)])
## X-squared = 8.5259, df = 2, p-value = 0.01408
## alternative hypothesis: two.sided
## sample estimates:
##     prop 1     prop 2     prop 3 
## 0.16417910 0.15625000 0.06766917
#zmienne rank i sex są zależne

prop.test(table(Salaries[c(2,5)]))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  table(Salaries[c(2, 5)])
## X-squared = 2.7708e-30, df = 1, p-value = 1
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.05883604  0.06328663
## sample estimates:
##     prop 1     prop 2 
## 0.09944751 0.09722222
#zmienne discipline i sex są niezależne
Salaries %>%
  group_by(rank,discipline,sex) %>%
  shapiro_test(salary)
## # A tibble: 12 x 6
##    rank      discipline sex    variable statistic        p
##    <fct>     <fct>      <fct>  <chr>        <dbl>    <dbl>
##  1 AsstProf  A          Female salary       0.870 0.226   
##  2 AsstProf  A          Male   salary       0.941 0.300   
##  3 AsstProf  B          Female salary       0.889 0.354   
##  4 AsstProf  B          Male   salary       0.941 0.0458  
##  5 AssocProf A          Female salary       0.863 0.269   
##  6 AssocProf A          Male   salary       0.878 0.0113  
##  7 AssocProf B          Female salary       0.635 0.00117 
##  8 AssocProf B          Male   salary       0.967 0.416   
##  9 Prof      A          Female salary       0.934 0.549   
## 10 Prof      A          Male   salary       0.952 0.000259
## 11 Prof      B          Female salary       0.974 0.923   
## 12 Prof      B          Male   salary       0.978 0.0435

rozkłady nie są normalne

usunięcie wartości odstających

w.odstajace <- Salaries %>%
  group_by(rank,discipline,sex) %>%
  identify_outliers(salary)
w.odstajace
## # A tibble: 18 x 8
##    rank  discipline sex   yrs.since.phd yrs.service salary is.outlier is.extreme
##    <fct> <fct>      <fct>         <int>       <int>  <int> <lgl>      <lgl>     
##  1 Asst~ A          Fema~             7           6  63100 TRUE       FALSE     
##  2 Asst~ A          Male              3           1  63900 TRUE       FALSE     
##  3 Asst~ A          Male              2           0  85000 TRUE       TRUE      
##  4 Asst~ A          Male              8           4  81035 TRUE       FALSE     
##  5 Asso~ A          Fema~            25          22  62884 TRUE       FALSE     
##  6 Asso~ A          Male             14           8 100102 TRUE       FALSE     
##  7 Asso~ A          Male              9           7  70000 TRUE       FALSE     
##  8 Asso~ A          Male             11           1 104800 TRUE       FALSE     
##  9 Asso~ A          Male             45          39  70700 TRUE       FALSE     
## 10 Asso~ A          Male             10           1 108413 TRUE       FALSE     
## 11 Asso~ A          Male             11           8 104121 TRUE       FALSE     
## 12 Asso~ B          Fema~            14           7 109650 TRUE       TRUE      
## 13 Asso~ B          Fema~            12           9  71065 TRUE       TRUE      
## 14 Asso~ B          Male             13          11 126431 TRUE       FALSE     
## 15 Prof  A          Male             29           7 204000 TRUE       FALSE     
## 16 Prof  A          Male             42          18 194800 TRUE       FALSE     
## 17 Prof  A          Male             43          43 205500 TRUE       FALSE     
## 18 Prof  B          Male             38          38 231545 TRUE       FALSE
library(extraoperators)
Salaries$salary[Salaries$salary%in%w.odstajace$salary] <- NA
library(stats)
Salaries <- na.omit(Salaries)
Salaries <- mutate(Salaries,log=log10(salary))


Salaries %>%
  group_by(rank,discipline,sex) %>%
  shapiro_test(log)
## # A tibble: 12 x 6
##    rank      discipline sex    variable statistic      p
##    <fct>     <fct>      <fct>  <chr>        <dbl>  <dbl>
##  1 AsstProf  A          Female log          0.813 0.104 
##  2 AsstProf  A          Male   log          0.952 0.560 
##  3 AsstProf  B          Female log          0.896 0.387 
##  4 AsstProf  B          Male   log          0.931 0.0218
##  5 AssocProf A          Female log          0.978 0.717 
##  6 AssocProf A          Male   log          0.891 0.0587
##  7 AssocProf B          Female log          0.916 0.517 
##  8 AssocProf B          Male   log          0.981 0.840 
##  9 Prof      A          Female log          0.936 0.575 
## 10 Prof      A          Male   log          0.985 0.212 
## 11 Prof      B          Female log          0.976 0.939 
## 12 Prof      B          Male   log          0.986 0.236

rozkłady są normalne

test ancova

Salaries %>%
  anova_test(log~rank*discipline*sex)
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
## 
##                Effect DFn DFd       F        p p<.05      ges
## 1                rank   2 366 189.135 3.89e-57     * 0.508000
## 2          discipline   1 366  57.685 2.59e-13     * 0.136000
## 3                 sex   1 366   0.312 5.77e-01       0.000851
## 4     rank:discipline   2 366   1.527 2.19e-01       0.008000
## 5            rank:sex   2 366   0.089 9.15e-01       0.000486
## 6      discipline:sex   1 366   0.713 3.99e-01       0.002000
## 7 rank:discipline:sex   2 366   0.315 7.30e-01       0.002000
Salaries %>%
  tukey_hsd(log~rank)
## # A tibble: 3 x 9
##   term  group1    group2    null.value estimate conf.low conf.high    p.adj
## * <chr> <chr>     <chr>          <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
## 1 rank  AsstProf  AssocProf          0   0.0620   0.0270    0.0971 0.000113
## 2 rank  AsstProf  Prof               0   0.182    0.155     0.208  0       
## 3 rank  AssocProf Prof               0   0.120    0.0915    0.148  0       
## # ... with 1 more variable: p.adj.signif <chr>
#każda grupa różni się istotnie między sobą

Salaries %>%
  tukey_hsd(log~discipline)
## # A tibble: 1 x 9
##   term  group1 group2 null.value estimate conf.low conf.high  p.adj p.adj.signif
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>  <dbl> <chr>       
## 1 disc~ A      B               0   0.0369   0.0152    0.0587 9.3e-4 ***
#grupy różnią się między sobą
ggplot(Salaries,aes(yrs.since.phd,salary))+geom_point()

Salaries %>%
  cor_test(yrs.since.phd,salary,method="pearson")
## # A tibble: 1 x 8
##   var1          var2     cor statistic        p conf.low conf.high method 
##   <chr>         <chr>  <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>  
## 1 yrs.since.phd salary   0.4      8.53 3.76e-16    0.314     0.484 Pearson
ggplot(Salaries,aes(yrs.service,salary))+geom_point()

Salaries %>%
  cor_test(yrs.service,salary,method="pearson")
## # A tibble: 1 x 8
##   var1        var2     cor statistic        p conf.low conf.high method 
##   <chr>       <chr>  <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>  
## 1 yrs.service salary  0.33      6.72 6.57e-11    0.235     0.415 Pearson

w obu przypadkach zmienne są skorelowane

ggplot(Salaries,aes(yrs.since.phd,salary,color=rank))+geom_point()

Salaries %>%
  group_by(rank) %>%
  cor_test(yrs.since.phd,salary,method="pearson")
## # A tibble: 3 x 9
##   rank      var1          var2    cor statistic      p conf.low conf.high method
##   <fct>     <chr>         <chr> <dbl>     <dbl>  <dbl>    <dbl>     <dbl> <chr> 
## 1 AsstProf  yrs.since.phd sala~ -0.17    -1.33  0.188    -0.399    0.0830 Pears~
## 2 AssocProf yrs.since.phd sala~ -0.23    -1.70  0.0957   -0.468    0.0412 Pears~
## 3 Prof      yrs.since.phd sala~ -0.05    -0.809 0.419    -0.171    0.0716 Pears~
ggplot(Salaries,aes(yrs.service,salary,color=rank))+geom_point()

Salaries %>%
  group_by(rank) %>%
  cor_test(yrs.service,salary,method="pearson")
## # A tibble: 3 x 9
##   rank      var1        var2      cor statistic      p conf.low conf.high method
##   <fct>     <chr>       <chr>   <dbl>     <dbl>  <dbl>    <dbl>     <dbl> <chr> 
## 1 AsstProf  yrs.service salary  0.24       1.95 0.0554 -0.00546    0.463  Pears~
## 2 AssocProf yrs.service salary -0.21      -1.55 0.128  -0.452      0.0617 Pears~
## 3 Prof      yrs.service salary -0.077     -1.24 0.215  -0.197      0.0449 Pears~

w obu przypadkach zmienne nie są skorelowane