library(MASS)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:janitor':
## 
##     make_clean_names
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
boston_data <- Boston

colnames(boston_data) <- tolower(colnames(boston_data))  

head(boston_data, 7)
##      crim   zn indus chas   nox    rm  age    dis rad tax ptratio  black lstat
## 1 0.00632 18.0  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0.0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0.0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0.0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0.0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0.0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
## 7 0.08829 12.5  7.87    0 0.524 6.012 66.6 5.5605   5 311    15.2 395.60 12.43
##   medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
## 7 22.9
summary(boston_data)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          black       
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      lstat            medv      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00
str(boston_data)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
boston_data$rad <- as.factor(boston_data$rad)
boston_data$zn <- as.factor(boston_data$zn)

tabyl(boston_data, rad)
##  rad   n    percent
##    1  20 0.03952569
##    2  24 0.04743083
##    3  38 0.07509881
##    4 110 0.21739130
##    5 115 0.22727273
##    6  26 0.05138340
##    7  17 0.03359684
##    8  24 0.04743083
##   24 132 0.26086957
tabyl(boston_data, zn)
##    zn   n     percent
##     0 372 0.735177866
##  12.5  10 0.019762846
##  17.5   1 0.001976285
##    18   1 0.001976285
##    20  21 0.041501976
##    21   4 0.007905138
##    22  10 0.019762846
##    25  10 0.019762846
##    28   3 0.005928854
##    30   6 0.011857708
##    33   4 0.007905138
##    34   3 0.005928854
##    35   3 0.005928854
##    40   7 0.013833992
##    45   6 0.011857708
##  52.5   3 0.005928854
##    55   3 0.005928854
##    60   4 0.007905138
##    70   3 0.005928854
##    75   3 0.005928854
##    80  15 0.029644269
##  82.5   2 0.003952569
##    85   2 0.003952569
##    90   5 0.009881423
##    95   4 0.007905138
##   100   1 0.001976285
tabyl(boston_data, rad, zn)
##  rad   0 12.5 17.5 18 20 21 22 25 28 30 33 34 35 40 45 52.5 55 60 70 75 80 82.5
##    1   6    0    0  1  0  0  0  0  0  0  0  0  3  2  0    0  1  2  0  0  3    0
##    2  18    0    0  0  0  0  0  0  0  0  0  0  0  0  0    0  0  0  0  0  3    2
##    3  26    0    1  0  5  0  0  0  0  0  0  0  0  0  0    0  0  0  0  3  0    0
##    4  77    3    0  0  0  4  0  4  3  0  0  0  0  5  0    0  0  2  0  0  9    0
##    5  78    7    0  0 16  0  0  0  0  0  0  0  0  0  6    0  2  0  3  0  0    0
##    6  17    0    0  0  0  0  0  0  0  6  0  0  0  0  0    3  0  0  0  0  0    0
##    7   0    0    0  0  0  0 10  0  0  0  4  3  0  0  0    0  0  0  0  0  0    0
##    8  18    0    0  0  0  0  0  6  0  0  0  0  0  0  0    0  0  0  0  0  0    0
##   24 132    0    0  0  0  0  0  0  0  0  0  0  0  0  0    0  0  0  0  0  0    0
##  85 90 95 100
##   0  2  0   0
##   1  0  0   0
##   0  1  2   0
##   1  0  2   0
##   0  2  0   1
##   0  0  0   0
##   0  0  0   0
##   0  0  0   0
##   0  0  0   0
boston_data |> 
  group_by(rm) |> 
  summarise(MeanValue = mean(medv, na.rm = TRUE), SDValue = sd(medv, na.rm = TRUE))
## # A tibble: 446 × 3
##       rm MeanValue SDValue
##    <dbl>     <dbl>   <dbl>
##  1  3.56      27.5   NA   
##  2  3.86      23.1   NA   
##  3  4.14      12.8    1.34
##  4  4.37       8.8   NA   
##  5  4.52       7     NA   
##  6  4.63      17.9   NA   
##  7  4.65      10.5   NA   
##  8  4.88      10.2   NA   
##  9  4.90      11.8   NA   
## 10  4.91      13.8   NA   
## # ℹ 436 more rows
boston_data |> 
  select(rm, medv) |> 
  get_summary_stats(type = "full")
## # A tibble: 2 × 13
##   variable     n   min   max median    q1    q3   iqr   mad  mean    sd    se
##   <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 rm         506  3.56  8.78   6.21  5.89  6.62 0.738 0.512  6.28 0.703 0.031
## 2 medv       506  5    50     21.2  17.0  25    7.98  5.93  22.5  9.20  0.409
## # ℹ 1 more variable: ci <dbl>