Шелепин Н.: Дожди в Австралии

# установка нужных библиотек
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
# все нужные пакеты нужно перечислить ниже
pacman::p_load(boot, lmPerm, tidyverse, ggplot2, ggpubr)
weather <- read_csv("weatherAUS.csv")
## Rows: 145460 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (6): Location, WindGustDir, WindDir9am, WindDir3pm, RainToday, RainTom...
## dbl  (16): MinTemp, MaxTemp, Rainfall, Evaporation, Sunshine, WindGustSpeed,...
## date  (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unique(weather$Location)
##  [1] "Albury"           "BadgerysCreek"    "Cobar"            "CoffsHarbour"    
##  [5] "Moree"            "Newcastle"        "NorahHead"        "NorfolkIsland"   
##  [9] "Penrith"          "Richmond"         "Sydney"           "SydneyAirport"   
## [13] "WaggaWagga"       "Williamtown"      "Wollongong"       "Canberra"        
## [17] "Tuggeranong"      "MountGinini"      "Ballarat"         "Bendigo"         
## [21] "Sale"             "MelbourneAirport" "Melbourne"        "Mildura"         
## [25] "Nhil"             "Portland"         "Watsonia"         "Dartmoor"        
## [29] "Brisbane"         "Cairns"           "GoldCoast"        "Townsville"      
## [33] "Adelaide"         "MountGambier"     "Nuriootpa"        "Woomera"         
## [37] "Albany"           "Witchcliffe"      "PearceRAAF"       "PerthAirport"    
## [41] "Perth"            "SalmonGums"       "Walpole"          "Hobart"          
## [45] "Launceston"       "AliceSprings"     "Darwin"           "Katherine"       
## [49] "Uluru"

Плохие графики

  • Сделаем графики без обработки данных

Выберем несколько локаций и построим по ним коробчатые диаграммы

locations = c("Albury", "BadgerysCreek", "Melbourne", "Walpole", "Cairns")
weather_f <- weather %>% select(Rainfall, Location) %>%
  drop_na() %>% filter(Location %in% locations) %>% sample_frac(0.01)
gr_weather<- ggplot(data = weather_f, 
                    aes(x = Location, y = Rainfall)) +
  geom_boxplot() +
  # geom_jitter(width = 0.25) +
  # stat_compare_means() +
  xlab("Место")
  
gr_weather

Очень много данных, много выбросов, всё сплюснутое и ничего не понятно

Графики получше

locations = c("Albury", "BadgerysCreek", "Melbourne", "Walpole", "Cairns")
# locations = c("Walpole")
weather_b <- weather %>% select(Rainfall, Location) %>%
  drop_na() %>% filter(Location %in% locations) %>% sample_frac(0.01)
  • Уберем выбросы
weather_c <- weather_b %>%
  group_by(Location) %>%
  filter(Rainfall >= quantile(Rainfall, 0.25) - 1.5 * IQR(Rainfall) &
         Rainfall <= quantile(Rainfall, 0.75) + 1.5 * IQR(Rainfall)) %>%
  ungroup()

# cleaned_data <- weather_b %>%
#   filter(!(abs(Rainfall - median(Rainfall)) > 2*sd(Rainfall)))
gr_weather<- ggplot(data = weather_c, 
                    aes(x = Location, y = Rainfall, color = Location)) +
  geom_boxplot() +
  geom_jitter(width = 0.25) +
  xlab("Место")
  
gr_weather