View(airquality)
help("airquality")
# View the measure summary
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
mean(airquality$Wind)
## [1] 9.957516
mean(airquality$Ozone, na.rm = TRUE) # remove the NAs
## [1] 42.12931
# Weighted mean: use R as a calculator.
median(airquality$Wind)
## [1] 9.7
# R doesn't have a built-in function for getting the statistical mode, so we need to use a written function. We name it "get_mode":
get_mode <- function(x) {
x <- na.omit(x)
freq_table <- table(x)
max_freq <- max(freq_table)
mode <- as.numeric(names(freq_table[freq_table == max_freq]))
return(mode)
}
# Then we can use this function "get_mode" to get the statistical modes of variables:
get_mode(airquality$Wind)
## [1] 11.5
get_mode(airquality$Temp)
## [1] 81
summary(airquality$Temp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.00 72.00 79.00 77.88 85.00 97.00
quantile(airquality$Temp)
## 0% 25% 50% 75% 100%
## 56 72 79 85 97
max(airquality$Temp)
## [1] 97
min(airquality$Temp)
## [1] 56
sd_temp <- sd(airquality$Temp)
var_temp <- var(airquality$Temp)
sd_temp
## [1] 9.46527
var_temp
## [1] 89.59133
sd_temp^2
## [1] 89.59133
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ggplot(data = airquality, aes(x = as.factor(Month), y = Ozone)) +
geom_boxplot(color="red", fill="orange", alpha=0.2)
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_boxplot()`).