Mean, Median, Mode, Five-number Summary, Variance and Standard Deviation

View(airquality)
help("airquality")

# View the measure summary
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 

Mean

mean(airquality$Wind)
## [1] 9.957516
mean(airquality$Ozone, na.rm = TRUE) # remove the NAs
## [1] 42.12931
# Weighted mean: use R as a calculator.

Median

median(airquality$Wind)
## [1] 9.7

Mode

# R doesn't have a built-in function for getting the statistical mode, so we need to use a written function. We name it "get_mode":
get_mode <- function(x) {
  x <- na.omit(x)
  freq_table <- table(x)
  max_freq <- max(freq_table)
  mode <- as.numeric(names(freq_table[freq_table == max_freq]))
  return(mode)
}

# Then we can use this function "get_mode" to get the statistical modes of variables:
get_mode(airquality$Wind)
## [1] 11.5
get_mode(airquality$Temp)
## [1] 81

Five-number Summary

summary(airquality$Temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   56.00   72.00   79.00   77.88   85.00   97.00
quantile(airquality$Temp)
##   0%  25%  50%  75% 100% 
##   56   72   79   85   97
max(airquality$Temp)
## [1] 97
min(airquality$Temp)
## [1] 56

Variance and Standard Deviation

sd_temp <- sd(airquality$Temp)
var_temp <- var(airquality$Temp)
sd_temp
## [1] 9.46527
var_temp
## [1] 89.59133
sd_temp^2
## [1] 89.59133

Boxplot

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ggplot(data = airquality, aes(x = as.factor(Month), y = Ozone)) + 
    geom_boxplot(color="red", fill="orange", alpha=0.2)
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_boxplot()`).