# установка нужных библиотек
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
# все нужные пакеты нужно перечислить ниже
pacman::p_load(boot, lmPerm, tidyverse, ggplot2, ggpubr)
weather <- read_csv("weatherAUS.csv")
## Rows: 145460 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Location, WindGustDir, WindDir9am, WindDir3pm, RainToday, RainTom...
## dbl (16): MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustSpeed,...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unique(weather$Location)
## [1] "Albury" "BadgerysCreek" "Cobar" "CoffsHarbour"
## [5] "Moree" "Newcastle" "NorahHead" "NorfolkIsland"
## [9] "Penrith" "Richmond" "Sydney" "SydneyAirport"
## [13] "WaggaWagga" "Williamtown" "Wollongong" "Canberra"
## [17] "Tuggeranong" "MountGinini" "Ballarat" "Bendigo"
## [21] "Sale" "MelbourneAirport" "Melbourne" "Mildura"
## [25] "Nhil" "Portland" "Watsonia" "Dartmoor"
## [29] "Brisbane" "Cairns" "GoldCoast" "Townsville"
## [33] "Adelaide" "MountGambier" "Nuriootpa" "Woomera"
## [37] "Albany" "Witchcliffe" "PearceRAAF" "PerthAirport"
## [41] "Perth" "SalmonGums" "Walpole" "Hobart"
## [45] "Launceston" "AliceSprings" "Darwin" "Katherine"
## [49] "Uluru"
Выберем несколько локаций и построим по ним коробчатые диаграммы
locations = c("Albury", "BadgerysCreek", "Melbourne", "Walpole", "Cairns")
weather_f <- weather %>% select(Rainfall, Location) %>%
drop_na() %>% filter(Location %in% locations) %>% sample_frac(0.01)
gr_weather<- ggplot(data = weather_f,
aes(x = Location, y = Rainfall)) +
geom_boxplot() +
# geom_jitter(width = 0.25) +
# stat_compare_means() +
xlab("Место")
gr_weather
Очень много данных, много выбросов, всё сплюснутое и ничего не понятно
locations = c("Albury", "BadgerysCreek", "Melbourne", "Walpole", "Cairns")
# locations = c("Walpole")
weather_b <- weather %>% select(Rainfall, Location) %>%
drop_na() %>% filter(Location %in% locations) %>% sample_frac(0.01)
weather_c <- weather_b %>%
group_by(Location) %>%
filter(Rainfall >= quantile(Rainfall, 0.25) - 1.5 * IQR(Rainfall) &
Rainfall <= quantile(Rainfall, 0.75) + 1.5 * IQR(Rainfall)) %>%
ungroup()
# cleaned_data <- weather_b %>%
# filter(!(abs(Rainfall - median(Rainfall)) > 2*sd(Rainfall)))
gr_weather<- ggplot(data = weather_c,
aes(x = Location, y = Rainfall, color = Location)) +
geom_boxplot() +
geom_jitter(width = 0.25) +
xlab("Место")
gr_weather