library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom 1.0.6 ✔ rsample 1.2.1
## ✔ dials 1.3.0 ✔ tune 1.2.1
## ✔ infer 1.0.7 ✔ workflows 1.1.4
## ✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
## ✔ parsnip 1.2.1 ✔ yardstick 1.3.1
## ✔ recipes 1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(correlationfunnel)
## ══ Using correlationfunnel? ════════════════════════════════════════════════════
## You might also be interested in applied data science training for business.
## </> Learn more at - www.business-science.io </>
library(tidytext)
library(usemodels)
library(textrecipes)
data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-13/bigfoot.csv')
## Rows: 5021 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): observed, location_details, county, state, season, title, classif...
## dbl (17): latitude, longitude, number, temperature_high, temperature_mid, t...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(data)
| Name | data |
| Number of rows | 5021 |
| Number of columns | 28 |
| _______________________ | |
| Column type frequency: | |
| character | 10 |
| Date | 1 |
| numeric | 17 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| observed | 38 | 0.99 | 1 | 30374 | 0 | 4982 | 0 |
| location_details | 758 | 0.85 | 1 | 3876 | 0 | 4196 | 0 |
| county | 0 | 1.00 | 10 | 30 | 0 | 1037 | 0 |
| state | 0 | 1.00 | 4 | 14 | 0 | 49 | 0 |
| season | 0 | 1.00 | 4 | 7 | 0 | 5 | 0 |
| title | 976 | 0.81 | 23 | 235 | 0 | 4045 | 0 |
| classification | 0 | 1.00 | 7 | 7 | 0 | 3 | 0 |
| geohash | 976 | 0.81 | 10 | 10 | 0 | 4001 | 0 |
| precip_type | 3298 | 0.34 | 4 | 4 | 0 | 2 | 0 |
| summary | 1655 | 0.67 | 15 | 103 | 0 | 321 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 976 | 0.81 | 1869-11-10 | 2021-11-27 | 2003-11-16 | 3111 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| latitude | 976 | 0.81 | 39.36 | 5.68 | 25.14 | 35.35 | 39.30 | 43.93 | 64.89 | ▂▇▆▁▁ |
| longitude | 976 | 0.81 | -97.42 | 16.73 | -167.13 | -117.06 | -91.77 | -83.07 | -68.23 | ▁▁▆▆▇ |
| number | 0 | 1.00 | 21520.23 | 19259.15 | 60.00 | 4595.00 | 15473.00 | 33979.00 | 71997.00 | ▇▃▂▂▁ |
| temperature_high | 1683 | 0.66 | 67.12 | 17.78 | -0.62 | 55.14 | 69.97 | 81.10 | 106.51 | ▁▂▅▇▃ |
| temperature_mid | 1835 | 0.63 | 57.84 | 16.40 | -8.46 | 46.77 | 59.36 | 70.38 | 94.03 | ▁▁▆▇▃ |
| temperature_low | 1832 | 0.64 | 48.64 | 15.94 | -22.78 | 37.50 | 49.40 | 60.66 | 84.34 | ▁▁▅▇▃ |
| dew_point | 1648 | 0.67 | 46.23 | 16.44 | -11.21 | 34.77 | 46.69 | 59.00 | 77.40 | ▁▂▆▇▅ |
| humidity | 1648 | 0.67 | 0.71 | 0.16 | 0.08 | 0.62 | 0.73 | 0.82 | 1.00 | ▁▁▃▇▅ |
| cloud_cover | 1937 | 0.61 | 0.44 | 0.33 | 0.00 | 0.12 | 0.40 | 0.73 | 1.00 | ▇▅▃▃▅ |
| moon_phase | 1625 | 0.68 | 0.50 | 0.29 | 0.00 | 0.25 | 0.49 | 0.75 | 1.00 | ▇▇▇▇▇ |
| precip_intensity | 2309 | 0.54 | 0.01 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 2.07 | ▇▁▁▁▁ |
| precip_probability | 2311 | 0.54 | 0.30 | 0.42 | 0.00 | 0.00 | 0.00 | 0.73 | 1.00 | ▇▁▁▁▃ |
| pressure | 2402 | 0.52 | 1017.08 | 6.14 | 980.34 | 1013.42 | 1016.96 | 1020.64 | 1042.41 | ▁▁▇▆▁ |
| uv_index | 1629 | 0.68 | 5.16 | 3.14 | 0.00 | 3.00 | 5.00 | 8.00 | 13.00 | ▆▇▅▆▁ |
| visibility | 1972 | 0.61 | 8.49 | 2.06 | 0.74 | 7.66 | 9.45 | 10.00 | 10.00 | ▁▁▁▂▇ |
| wind_bearing | 1634 | 0.67 | 196.57 | 96.38 | 0.00 | 128.00 | 203.00 | 273.00 | 359.00 | ▅▅▇▇▆ |
| wind_speed | 1632 | 0.67 | 3.87 | 3.28 | 0.00 | 1.34 | 2.93 | 5.56 | 23.94 | ▇▃▁▁▁ |
data_clean <- data %>%
select(-precip_type, -precip_intensity, -precip_probability) %>%
na.omit() %>%
select(-c(date)) %>%
# Remove a third rare level
filter(classification != "Class C", !is.na(observed)) %>%
select(-c(location_details, title, summary, observed, geohash)) %>%
mutate(
classification = case_when(
classification == "Class A" ~ "sighting",
classification == "Class B" ~ "possible"
)
)
data_clean %>% count(classification)
## # A tibble: 2 × 2
## classification n
## <chr> <int>
## 1 possible 1053
## 2 sighting 1019
data_clean %>%
ggplot(aes(classification)) +
geom_bar()
data_clean %>%
ggplot(aes(classification, temperature_high)) +
geom_boxplot()
# step 1
data_binarized <- data_clean %>%
select(-number) %>%
binarize()
data_binarized %>% glimpse()
## Rows: 2,072
## Columns: 97
## $ county__Jackson_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Jefferson_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__King_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Pierce_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Snohomish_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Washington_County <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `county__-OTHER` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ state__Alabama <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Arkansas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__California <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Colorado <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Florida <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Georgia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Idaho <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Illinois <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Indiana <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Iowa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Kansas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Kentucky <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Michigan <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Missouri <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__New_Jersey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ state__New_York <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__North_Carolina <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ state__Ohio <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Oklahoma <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Oregon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Pennsylvania <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Tennessee <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Texas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Virginia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Washington <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__West_Virginia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Wisconsin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `state__-OTHER` <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0…
## $ season__Fall <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ season__Spring <dbl> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ season__Summer <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1…
## $ season__Unknown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ season__Winter <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `latitude__-Inf_35.298325` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ latitude__35.298325_39.642495 <dbl> 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ latitude__39.642495_43.46018 <dbl> 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1…
## $ latitude__43.46018_Inf <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0…
## $ `longitude__-Inf_-112.1051` <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0…
## $ `longitude__-112.1051_-88.748825` <dbl> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ `longitude__-88.748825_-82.1174575` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ `longitude__-82.1174575_Inf` <dbl> 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1…
## $ classification__possible <dbl> 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1…
## $ classification__sighting <dbl> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0…
## $ `temperature_high__-Inf_54.65` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1…
## $ temperature_high__54.65_69.905 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ temperature_high__69.905_81.2625 <dbl> 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0…
## $ temperature_high__81.2625_Inf <dbl> 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ `temperature_mid__-Inf_46.7925` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1…
## $ temperature_mid__46.7925_59.7775 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ temperature_mid__59.7775_70.86125 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0…
## $ temperature_mid__70.86125_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `temperature_low__-Inf_38.04` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ temperature_low__38.04_49.94 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1…
## $ temperature_low__49.94_61.4425 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ temperature_low__61.4425_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ `dew_point__-Inf_35.5475` <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0…
## $ dew_point__35.5475_47.51 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ dew_point__47.51_59.6225 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dew_point__59.6225_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ `humidity__-Inf_0.64` <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0…
## $ humidity__0.64_0.74 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ humidity__0.74_0.82 <dbl> 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0…
## $ humidity__0.82_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ `cloud_cover__-Inf_0.13` <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ cloud_cover__0.13_0.41 <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0…
## $ cloud_cover__0.41_0.74 <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ cloud_cover__0.74_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ `moon_phase__-Inf_0.25` <dbl> 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0…
## $ moon_phase__0.25_0.51 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ moon_phase__0.51_0.75 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1…
## $ moon_phase__0.75_Inf <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ `pressure__-Inf_1013.32` <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0…
## $ pressure__1013.32_1016.935 <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0…
## $ pressure__1016.935_1020.65 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pressure__1020.65_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1…
## $ `uv_index__-Inf_3` <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1…
## $ uv_index__3_5 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ uv_index__5_8 <dbl> 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0…
## $ uv_index__8_Inf <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0…
## $ `visibility__-Inf_7.63` <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0…
## $ visibility__7.63_9.4105 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ visibility__9.4105_Inf <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0…
## $ `wind_bearing__-Inf_127` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ wind_bearing__127_202 <dbl> 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ wind_bearing__202_268 <dbl> 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0…
## $ wind_bearing__268_Inf <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ `wind_speed__-Inf_1.42` <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0…
## $ wind_speed__1.42_2.97 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ wind_speed__2.97_5.4925 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ wind_speed__5.4925_Inf <dbl> 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
data_binarized %>% glimpse()
## Rows: 2,072
## Columns: 97
## $ county__Jackson_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Jefferson_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__King_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Pierce_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Snohomish_County <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county__Washington_County <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `county__-OTHER` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ state__Alabama <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Arkansas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__California <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Colorado <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Florida <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Georgia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Idaho <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Illinois <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Indiana <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Iowa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Kansas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Kentucky <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Michigan <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Missouri <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__New_Jersey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ state__New_York <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__North_Carolina <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ state__Ohio <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Oklahoma <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Oregon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Pennsylvania <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Tennessee <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Texas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Virginia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Washington <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__West_Virginia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ state__Wisconsin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `state__-OTHER` <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0…
## $ season__Fall <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ season__Spring <dbl> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0…
## $ season__Summer <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1…
## $ season__Unknown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ season__Winter <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `latitude__-Inf_35.298325` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ latitude__35.298325_39.642495 <dbl> 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ latitude__39.642495_43.46018 <dbl> 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1…
## $ latitude__43.46018_Inf <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0…
## $ `longitude__-Inf_-112.1051` <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0…
## $ `longitude__-112.1051_-88.748825` <dbl> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0…
## $ `longitude__-88.748825_-82.1174575` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0…
## $ `longitude__-82.1174575_Inf` <dbl> 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1…
## $ classification__possible <dbl> 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1…
## $ classification__sighting <dbl> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0…
## $ `temperature_high__-Inf_54.65` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1…
## $ temperature_high__54.65_69.905 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ temperature_high__69.905_81.2625 <dbl> 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0…
## $ temperature_high__81.2625_Inf <dbl> 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ `temperature_mid__-Inf_46.7925` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1…
## $ temperature_mid__46.7925_59.7775 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ temperature_mid__59.7775_70.86125 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0…
## $ temperature_mid__70.86125_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `temperature_low__-Inf_38.04` <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ temperature_low__38.04_49.94 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1…
## $ temperature_low__49.94_61.4425 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ temperature_low__61.4425_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ `dew_point__-Inf_35.5475` <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0…
## $ dew_point__35.5475_47.51 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ dew_point__47.51_59.6225 <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dew_point__59.6225_Inf <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ `humidity__-Inf_0.64` <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0…
## $ humidity__0.64_0.74 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ humidity__0.74_0.82 <dbl> 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0…
## $ humidity__0.82_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ `cloud_cover__-Inf_0.13` <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ cloud_cover__0.13_0.41 <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0…
## $ cloud_cover__0.41_0.74 <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
## $ cloud_cover__0.74_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ `moon_phase__-Inf_0.25` <dbl> 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0…
## $ moon_phase__0.25_0.51 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ moon_phase__0.51_0.75 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1…
## $ moon_phase__0.75_Inf <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ `pressure__-Inf_1013.32` <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0…
## $ pressure__1013.32_1016.935 <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0…
## $ pressure__1016.935_1020.65 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pressure__1020.65_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1…
## $ `uv_index__-Inf_3` <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1…
## $ uv_index__3_5 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ uv_index__5_8 <dbl> 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0…
## $ uv_index__8_Inf <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0…
## $ `visibility__-Inf_7.63` <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0…
## $ visibility__7.63_9.4105 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ visibility__9.4105_Inf <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0…
## $ `wind_bearing__-Inf_127` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ wind_bearing__127_202 <dbl> 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0…
## $ wind_bearing__202_268 <dbl> 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0…
## $ wind_bearing__268_Inf <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ `wind_speed__-Inf_1.42` <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0…
## $ wind_speed__1.42_2.97 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ wind_speed__2.97_5.4925 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ wind_speed__5.4925_Inf <dbl> 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
# step 2
data_correlation <- data_binarized %>%
correlate(classification__sighting)
data_correlation
## # A tibble: 97 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 classification possible -1
## 2 classification sighting 1
## 3 wind_speed -Inf_1.42 -0.0917
## 4 longitude -112.1051_-88.748825 0.0741
## 5 wind_speed 5.4925_Inf 0.0697
## 6 longitude -Inf_-112.1051 -0.0686
## 7 state California -0.0677
## 8 wind_bearing -Inf_127 0.0640
## 9 state Alabama 0.0598
## 10 dew_point 35.5475_47.51 -0.0573
## # ℹ 87 more rows
# step 3
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
library(tidymodels)
# set.seed(1234)
# data_clean <- data_clean %>% sample_n(100)
data_split <- initial_split(data_clean)
data_train <- training(data_split)
data_test <- testing(data_split)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [1398/156]> Fold01
## 2 <split [1398/156]> Fold02
## 3 <split [1398/156]> Fold03
## 4 <split [1398/156]> Fold04
## 5 <split [1399/155]> Fold05
## 6 <split [1399/155]> Fold06
## 7 <split [1399/155]> Fold07
## 8 <split [1399/155]> Fold08
## 9 <split [1399/155]> Fold09
## 10 <split [1399/155]> Fold10
xgboost_rec <- recipes::recipe(classification ~ ., data = data_train) %>%
update_role(number, new_role = "ID") %>%
step_dummy(all_nominal_predictors()) %>%
step_YeoJohnson(longitude, number, humidity, visibility, wind_speed)%>%
step_normalize(all_numeric_predictors()) %>%
step_pca(all_numeric_predictors(), threshold = .75)
xgboost_rec %>% prep() %>% juice() %>% glimpse()
## Rows: 1,554
## Columns: 446
## $ number <dbl> 120.75502, 130.40188, 168.73455, 119.62538, 166.84814, …
## $ classification <fct> sighting, possible, possible, possible, possible, sight…
## $ PC001 <dbl> -1.1782117, 1.0566478, 1.5112077, -0.6498679, -3.471479…
## $ PC002 <dbl> 0.8082876, 1.5821371, 3.5100633, 1.6672201, -0.4477700,…
## $ PC003 <dbl> -3.6281167059, 0.5795528362, 2.9809548300, -3.580495468…
## $ PC004 <dbl> 1.18872694, -2.19981078, -0.97198941, 0.16755085, 1.112…
## $ PC005 <dbl> 0.01157484, 1.61060114, -0.43412363, 0.32196992, 0.8551…
## $ PC006 <dbl> -1.03109289, -2.51496547, 1.87613206, -0.41714477, -0.2…
## $ PC007 <dbl> -1.01693175, 2.15319437, -2.07784129, -0.37038343, -1.4…
## $ PC008 <dbl> -0.49974316, -1.59737223, -1.87469536, -0.09309599, 0.0…
## $ PC009 <dbl> -0.6430447, 1.5724413, -2.6045766, -1.1686868, -1.00004…
## $ PC010 <dbl> -0.138181943, -2.095605882, 6.317381579, 0.237203283, 0…
## $ PC011 <dbl> -0.001920390, 1.336478296, -6.817327757, 0.197754796, -…
## $ PC012 <dbl> 0.059279809, 1.177824128, -0.534747159, 0.184446007, 0.…
## $ PC013 <dbl> -0.14566606, 0.31930765, 11.47716064, -0.01058192, -0.1…
## $ PC014 <dbl> 0.39041150, 1.54693786, 5.84997009, 0.45344582, 0.33257…
## $ PC015 <dbl> 0.054545166, 1.691180318, 7.852837503, -0.009946727, 0.…
## $ PC016 <dbl> 0.05533530, -0.04563165, -4.93355625, 0.06385475, 0.087…
## $ PC017 <dbl> 0.148604899, 2.209315558, 2.524084214, 0.104080032, 0.3…
## $ PC018 <dbl> -0.07505821, -0.61086027, -0.44929026, 0.11730644, -0.1…
## $ PC019 <dbl> -0.096856666, -1.215803333, 0.304577797, -0.141038919, …
## $ PC020 <dbl> -0.16952120, 0.86803524, 4.90473023, -0.25932710, -0.25…
## $ PC021 <dbl> -0.55042230, 2.03252544, -3.24424236, -0.46377890, -0.2…
## $ PC022 <dbl> 0.337347229, 1.317070225, -0.084181417, 0.042821756, 0.…
## $ PC023 <dbl> 0.19439007, -0.97953951, -0.40897602, 0.01131004, 0.052…
## $ PC024 <dbl> -0.17639723, -0.78983710, 2.79426522, -0.16291426, -0.2…
## $ PC025 <dbl> -0.152757233, -0.568505730, 2.146103906, -0.107360340, …
## $ PC026 <dbl> -0.26782860, -0.17478505, 0.94690125, -0.17523564, -0.3…
## $ PC027 <dbl> -0.153593645, 0.496621454, 0.813469990, -0.262807773, -…
## $ PC028 <dbl> -0.07203111, -0.04486998, 0.98827563, -0.08247323, -0.3…
## $ PC029 <dbl> 0.268790278, -0.021231484, -0.208806248, 0.187109569, 0…
## $ PC030 <dbl> -0.075093605, 0.203480457, -0.718575684, -0.307736621, …
## $ PC031 <dbl> -0.23044571, -0.49390697, -0.31440086, -0.29705591, -0.…
## $ PC032 <dbl> 0.02213031, -0.70668644, 0.44432751, 0.03696639, -0.062…
## $ PC033 <dbl> -0.02474626, 0.31193001, -0.23959437, 0.28898674, 0.293…
## $ PC034 <dbl> -0.100208653, 0.704149241, 0.663143908, -0.001475734, -…
## $ PC035 <dbl> -0.41074285, -0.54581523, -1.11260036, -0.35198738, -0.…
## $ PC036 <dbl> 0.135453034, -0.512279062, -0.600869983, 0.311168069, 0…
## $ PC037 <dbl> -0.37849044, -0.14009954, -0.02597591, -0.49673620, -0.…
## $ PC038 <dbl> 0.03123226, -0.07720410, 1.24256702, 0.10438820, -0.033…
## $ PC039 <dbl> -0.23210490, -0.05316535, -0.22110127, -0.20817566, -0.…
## $ PC040 <dbl> 0.06537634, 0.41928338, 1.41044550, 0.13518248, -0.0900…
## $ PC041 <dbl> 0.06016963, -0.32303040, 0.88699175, 0.08617660, 0.0343…
## $ PC042 <dbl> 0.02644978, 0.37071581, 0.39916604, -0.28369101, -0.112…
## $ PC043 <dbl> 0.04645658, 0.35191241, -0.74207216, 0.01655839, -0.196…
## $ PC044 <dbl> 0.549046740, 0.852448848, -0.971729823, 0.137505884, -0…
## $ PC045 <dbl> -0.4169176, -0.5643996, 1.5440835, 0.1378845, 0.1220122…
## $ PC046 <dbl> -0.06834800, 1.31384710, -2.41003606, -0.80649736, -0.3…
## $ PC047 <dbl> -0.52682312, -0.28350883, -0.63084035, -0.39053615, 0.1…
## $ PC048 <dbl> -0.134351335, -0.351269409, 0.146134169, -0.005121718, …
## $ PC049 <dbl> -0.65683842, -0.98542097, -0.14070297, -0.44072914, 0.6…
## $ PC050 <dbl> -0.2728304, -1.4260914, -0.3617300, -0.2627399, 1.11000…
## $ PC051 <dbl> -1.0354968, -1.2292462, -0.7254983, 1.4883345, -0.25034…
## $ PC052 <dbl> -0.111103681, 0.308774629, -0.346813686, -0.014828753, …
## $ PC053 <dbl> -0.26686599, -1.22002607, 0.72727723, 0.71566062, 0.403…
## $ PC054 <dbl> 1.0628427, -0.8532219, 0.2619691, 0.8379334, 0.2574674,…
## $ PC055 <dbl> 0.06471345, -0.51590266, 1.88834852, 0.36873213, 0.7402…
## $ PC056 <dbl> 0.67225272, 0.24716920, 2.02357037, 0.84621830, 0.71023…
## $ PC057 <dbl> 1.2052207, -0.1461956, -1.8396807, 0.2455131, 1.2021103…
## $ PC058 <dbl> 0.24819864, -1.92922719, -0.31507220, -0.48667926, 0.81…
## $ PC059 <dbl> 0.19516259, 0.48888258, 1.66508121, 1.27555637, 1.54516…
## $ PC060 <dbl> 0.08405527, -0.22398215, 0.40396747, -0.26056350, -0.59…
## $ PC061 <dbl> 0.117397852, -0.897939676, -0.774037466, -0.400782854, …
## $ PC062 <dbl> -1.16586833, -1.78845802, -0.33065844, 1.07998213, 0.53…
## $ PC063 <dbl> 6.04451714, -0.43977268, -0.11904071, -0.27872955, -0.7…
## $ PC064 <dbl> -1.22862791, -0.12709165, -0.20874562, 0.37053037, 0.53…
## $ PC065 <dbl> 0.99280788, 0.14182619, 0.30266791, -0.58542873, -1.328…
## $ PC066 <dbl> 0.42720435, 0.26946049, 0.35207384, 0.39117618, -0.9434…
## $ PC067 <dbl> 0.34827276, -0.61190699, 0.09392997, -0.80800600, -0.41…
## $ PC068 <dbl> 0.42553987, -0.09053114, -0.60730043, -0.91179904, 0.40…
## $ PC069 <dbl> 0.4047318462, -0.2586270754, -0.7716921008, -0.20337946…
## $ PC070 <dbl> -0.1369133846, -0.1984831679, 0.5775421120, -0.38260596…
## $ PC071 <dbl> -0.40166200, -0.34384276, 0.21664639, 1.18908084, 0.998…
## $ PC072 <dbl> 0.050282601, 0.027211763, -0.189591662, -0.501285391, 0…
## $ PC073 <dbl> 0.124839745, 0.111912530, -0.138403201, -0.391682511, -…
## $ PC074 <dbl> -0.14391287, -0.13216064, 0.27698682, -0.18000421, 0.33…
## $ PC075 <dbl> -0.151653461, 0.007305476, -0.124059397, 0.361553161, 0…
## $ PC076 <dbl> -0.24964179, 0.25773206, 0.12093798, 0.66010961, 0.5083…
## $ PC077 <dbl> 0.08098210, -0.15365750, -0.03955820, 0.05274046, -0.21…
## $ PC078 <dbl> 0.049065008, 0.695470867, 0.221261857, -1.075890881, 0.…
## $ PC079 <dbl> 0.033885189, 0.149551188, 0.110897534, -0.025573643, -0…
## $ PC080 <dbl> 0.112034417, -0.551456450, -0.307182372, 0.124825224, -…
## $ PC081 <dbl> -0.067034451, 0.289454251, 0.157355865, 0.109138224, 0.…
## $ PC082 <dbl> 0.118177999, 0.032752081, -0.025354542, -0.734545439, -…
## $ PC083 <dbl> -0.04006708, -0.22012711, 0.08961337, 0.57530104, 0.240…
## $ PC084 <dbl> 0.02648743, -0.32263845, 0.05220229, -0.34680517, 0.052…
## $ PC085 <dbl> 0.0388407749, -0.1302709324, 0.0177546031, 0.0153542147…
## $ PC086 <dbl> -0.012525757, 0.320409543, -0.075759382, -0.250017846, …
## $ PC087 <dbl> 0.1317017624, 0.4529150669, -0.0076706503, -0.642614006…
## $ PC088 <dbl> 0.015200231, -0.100585122, 0.097429313, 0.049479217, -0…
## $ PC089 <dbl> -0.028872740, -0.301709167, 0.132020788, 1.015555358, 0…
## $ PC090 <dbl> 0.009295313, -0.350925586, 0.022680899, 0.147365136, 0.…
## $ PC091 <dbl> 0.040118753, 0.200825630, -0.058020508, -0.248538224, -…
## $ PC092 <dbl> -0.001779097, -0.067928923, -0.129851368, 0.297352798, …
## $ PC093 <dbl> -0.007409048, -0.226581629, -0.087315769, 0.869338809, …
## $ PC094 <dbl> 0.007242279, -0.051553298, -0.258230224, 0.201152483, -…
## $ PC095 <dbl> -0.02625024, -0.36404860, 0.34302763, 0.54444689, 0.307…
## $ PC096 <dbl> -0.01738217, 0.38653756, 0.12486177, -0.05201923, 0.080…
## $ PC097 <dbl> -0.005209655, 0.067104801, -0.438352290, -0.375387189, …
## $ PC098 <dbl> -0.03657649, 0.17696969, -0.07145621, -0.62650808, -0.0…
## $ PC099 <dbl> 0.01808599, 0.24546316, 0.02176352, -0.64378796, -0.626…
## $ PC100 <dbl> 0.130157201, -0.050899665, -0.235267710, -0.479033770, …
## $ PC101 <dbl> -0.03587941, 0.16442513, -0.11355426, 0.65632862, 1.084…
## $ PC102 <dbl> -0.078350826, -0.055132159, -0.022587681, -0.034449372,…
## $ PC103 <dbl> 0.01162045, -0.05562929, 0.04068569, 0.64924900, -0.049…
## $ PC104 <dbl> -0.01583167, -0.56792854, -0.24534404, -0.09547160, 0.6…
## $ PC105 <dbl> -0.021644029, 0.247982442, 0.085005078, 0.884502665, 0.…
## $ PC106 <dbl> -0.003199351, 0.109736306, -0.117604240, -0.128746797, …
## $ PC107 <dbl> 0.133760477, 0.308324647, -0.152899611, -3.470741780, -…
## $ PC108 <dbl> -0.035012117, 0.056283261, 0.192374939, 1.518588442, 0.…
## $ PC109 <dbl> 0.0039121819, 0.0082291637, 0.1540239257, 0.5251662974,…
## $ PC110 <dbl> 0.050827937, 0.204311905, 0.089043022, -1.871429548, -0…
## $ PC111 <dbl> -0.064190533, -0.255198694, 0.023385873, 1.628614167, 1…
## $ PC112 <dbl> -0.049210245, -0.062326170, 0.146903519, 1.368847950, 0…
## $ PC113 <dbl> 0.063036968, -0.121741035, -0.198811646, -2.570081869, …
## $ PC114 <dbl> -0.011495415, 0.205206951, 0.060166129, 0.340580831, 0.…
## $ PC115 <dbl> 0.039470079, -1.219429792, -0.111477743, -2.360600167, …
## $ PC116 <dbl> -0.022952913, 0.553228565, 0.036406205, 1.534760032, 0.…
## $ PC117 <dbl> -0.0955013792, -0.2163897074, -0.0281346252, 4.43676916…
## $ PC118 <dbl> 0.02311319, 0.15181947, 0.60872900, 0.75677279, -0.0924…
## $ PC119 <dbl> -0.029494019, -1.214226995, -0.072989271, 3.118691587, …
## $ PC120 <dbl> 0.0420414280, -0.0446911558, -0.0433149295, -1.66397318…
## $ PC121 <dbl> -0.006656182, 0.639223858, -0.049948716, 0.949270934, -…
## $ PC122 <dbl> -0.001207122, -0.333641595, 0.039838444, 0.383587936, 0…
## $ PC123 <dbl> -0.065490823, 0.373516598, 0.296751364, 3.178601524, 1.…
## $ PC124 <dbl> -0.001512480, 0.005504913, -0.108405391, 2.581232449, 0…
## $ PC125 <dbl> -0.008848391, -0.035840393, -0.011342032, -1.321117947,…
## $ PC126 <dbl> -0.004846318, -0.103396820, -0.062813226, -1.719131305,…
## $ PC127 <dbl> 0.0005853877, 0.0623490105, 0.1218630653, 2.1461616468,…
## $ PC128 <dbl> 0.013509719, -0.410109546, -0.038259305, 1.473683828, -…
## $ PC129 <dbl> 0.006549437, -0.345862308, -0.272236775, 7.091225804, 0…
## $ PC130 <dbl> -0.0009589146, 0.1508325164, -0.2764207922, 4.615453071…
## $ PC131 <dbl> -0.0398724428, -0.1853821666, 0.3129548144, -3.97370237…
## $ PC132 <dbl> -0.020858626, 0.551380778, -0.130038049, 4.730653552, 0…
## $ PC133 <dbl> 0.002108776, 0.519235335, 0.439595180, 3.826704533, 0.4…
## $ PC134 <dbl> 0.0009653117, 0.2480305189, -0.3115996765, 2.3743981908…
## $ PC135 <dbl> 0.009409829, 0.054433361, -0.281622055, 4.964155107, -2…
## $ PC136 <dbl> -0.007652736, 0.237673553, -0.299525980, 2.088193832, -…
## $ PC137 <dbl> 0.005508404, 0.361214302, -0.128265803, 2.058158180, -1…
## $ PC138 <dbl> -0.009426309, 0.181035880, 0.248244000, -1.516047231, 0…
## $ PC139 <dbl> -0.017463124, -0.028054790, 0.518901030, -0.770369657, …
## $ PC140 <dbl> -0.002190932, -0.310087157, -0.339626056, 0.912604379, …
## $ PC141 <dbl> -0.01462934, 0.25410173, -0.07197596, -0.89466466, 0.39…
## $ PC142 <dbl> -0.027298291, 0.252470228, 0.147153684, 0.533977684, 0.…
## $ PC143 <dbl> -0.081576952, 0.183902459, 0.044971140, -2.951355387, 1…
## $ PC144 <dbl> 0.006091719, -0.590749429, -0.499292365, -0.257897284, …
## $ PC145 <dbl> 0.0225544259, -0.4730229277, -0.1484135386, 1.047488890…
## $ PC146 <dbl> 0.005606635, -0.171795438, -0.055920507, 0.157761773, 0…
## $ PC147 <dbl> -0.0026799993, 0.1943058733, 0.1810209036, -0.045250401…
## $ PC148 <dbl> 0.007395822, -0.510923454, -0.036782834, 0.231367707, 3…
## $ PC149 <dbl> 0.0013635622, 0.1024397925, -0.1582218131, 0.2313576313…
## $ PC150 <dbl> 0.009220915, -0.414749061, -0.157009147, 0.301948899, -…
## $ PC151 <dbl> -0.003151287, -0.094947530, 0.052249570, 0.228593476, 0…
## $ PC152 <dbl> 0.009719887, 0.130645464, -0.275134982, 0.500979995, 1.…
## $ PC153 <dbl> 0.001824192, -0.477127722, -0.489455209, -0.359261781, …
## $ PC154 <dbl> -5.260614e-03, 2.817254e-01, 7.734351e-03, 7.405531e-01…
## $ PC155 <dbl> 0.0001983975, 0.1800033870, -0.0528821546, -0.403765046…
## $ PC156 <dbl> 0.009994433, 0.296214626, -0.573868301, -0.266265429, -…
## $ PC157 <dbl> 0.003852707, -0.289961973, -0.077868979, 0.262789425, -…
## $ PC158 <dbl> 0.017786762, -0.126965440, -0.230078372, 0.001752409, 1…
## $ PC159 <dbl> 0.0005006848, -0.0200531931, 0.3899540605, -0.055225454…
## $ PC160 <dbl> -0.007710249, -1.096447470, 0.197089158, 0.044540516, -…
## $ PC161 <dbl> -0.011983893, -0.162160503, 0.208223809, -0.484638218, …
## $ PC162 <dbl> -0.007831744, 0.909810223, -0.072751336, 0.863896073, 2…
## $ PC163 <dbl> -0.0274344775, -0.7052843751, 0.2533509582, -0.47921053…
## $ PC164 <dbl> -0.013085808, 0.665660789, 0.379501263, -0.001444243, -…
## $ PC165 <dbl> 0.018942578, -0.764531176, -0.739943244, 0.441003389, 2…
## $ PC166 <dbl> -0.001081316, 0.003771023, 0.057557172, 0.034954457, -0…
## $ PC167 <dbl> -0.007329023, 0.034080832, 0.113702774, -0.178316400, -…
## $ PC168 <dbl> -0.002410476, 0.074295662, 0.182165854, -0.248054886, 0…
## $ PC169 <dbl> -0.03477926, 0.15620953, 0.40895070, -0.51041965, -2.25…
## $ PC170 <dbl> -0.010808093, 0.263164919, -0.326998732, 0.013181809, -…
## $ PC171 <dbl> 0.01990131, 0.04088698, 0.44598754, -0.02865767, 0.8782…
## $ PC172 <dbl> -0.017281772, 0.310144099, -0.173978170, 0.116274289, -…
## $ PC173 <dbl> 0.041144849, 0.473625327, -0.530825471, 0.976420278, 1.…
## $ PC174 <dbl> 0.011077095, 0.306594872, -0.372643247, 0.693078095, 0.…
## $ PC175 <dbl> -0.074062968, 0.016905126, -0.333793589, -1.007909667, …
## $ PC176 <dbl> 0.008957886, -0.065961751, -0.080226680, 0.222807071, -…
## $ PC177 <dbl> -4.801772e-13, -4.655201e-12, 1.219347e-11, -4.383818e-…
## $ PC178 <dbl> 1.035727e-12, 3.761708e-11, -9.444938e-11, 2.953033e-11…
## $ PC179 <dbl> 9.625988e-13, -8.318033e-13, -7.528708e-13, 1.129544e-1…
## $ PC180 <dbl> -2.301175e-12, -7.345445e-11, 7.448518e-11, -4.330480e-…
## $ PC181 <dbl> -6.733781e-13, 5.441513e-11, -1.566678e-11, -4.968741e-…
## $ PC182 <dbl> -8.944890e-13, -1.068780e-11, 1.930200e-11, -1.717601e-…
## $ PC183 <dbl> 7.692213e-13, 8.315710e-11, -7.691856e-11, 1.484040e-11…
## $ PC184 <dbl> 9.196187e-13, 2.158604e-11, -2.090122e-11, 1.967394e-11…
## $ PC185 <dbl> 7.601899e-13, 2.786817e-11, -2.375648e-11, 1.410366e-11…
## $ PC186 <dbl> 9.776386e-15, 2.183460e-11, -6.951951e-12, -6.313638e-1…
## $ PC187 <dbl> 1.293592e-12, 7.828355e-11, -1.076028e-10, 4.892272e-11…
## $ PC188 <dbl> 0.0027175944, 0.1870791070, -0.2389785919, 0.0351344558…
## $ PC189 <dbl> 0.0019125823, 0.0261021330, 0.1169397085, 0.0581437548,…
## $ PC190 <dbl> 0.0048336448, -0.1182450112, 0.0355958429, -0.019223837…
## $ PC191 <dbl> 0.001695906, -0.541163389, -0.008828921, -0.067997264, …
## $ PC192 <dbl> -0.0101185614, -0.1503661505, 0.2777378000, -0.11037297…
## $ PC193 <dbl> -0.002979742, 0.353402052, -0.043597706, -0.032093294, …
## $ PC194 <dbl> -0.0000887438, 0.0432363458, 0.6278115092, -0.311169616…
## $ PC195 <dbl> -0.0090285730, -0.2084534088, 0.2332646471, -0.04757576…
## $ PC196 <dbl> 3.902372e-03, -5.458551e-01, -1.062683e-01, 5.859725e-0…
## $ PC197 <dbl> -0.0089607603, -0.0312830541, -0.0194499899, 0.00903662…
## $ PC198 <dbl> -0.0127895824, 0.4459738370, 0.2098579442, 0.0101024583…
## $ PC199 <dbl> 0.00526072, 0.78236971, 0.47037185, 0.17021765, -0.0393…
## $ PC200 <dbl> -0.0027287736, -0.4196192935, -0.2343781449, -0.0756188…
## $ PC201 <dbl> -0.002127028, 0.377958054, -0.263277239, 0.126033711, 0…
## $ PC202 <dbl> -0.005535218, 0.165078482, -0.021186909, 0.210399241, -…
## $ PC203 <dbl> 0.006225173, -0.549708756, 0.063772085, 0.035068848, 0.…
## $ PC204 <dbl> 0.001642222, -1.365765431, 0.319724036, 0.201314384, -0…
## $ PC205 <dbl> -0.0013572095, -0.2558193438, -0.7242554065, 0.07202482…
## $ PC206 <dbl> -0.004796366, -0.326290936, 0.979916441, -0.253429787, …
## $ PC207 <dbl> -0.0028914411, 1.7494444691, 0.0418889639, -0.055597667…
## $ PC208 <dbl> 0.002866645, -2.193133737, -0.262203143, -0.205208638, …
## $ PC209 <dbl> 0.004833549, -0.367593046, -0.123938634, -0.009411983, …
## $ PC210 <dbl> 0.004048847, -0.792548753, -1.306476636, 0.240327114, 0…
## $ PC211 <dbl> -0.0026366485, 0.2780549540, 0.4800352970, 0.1547293760…
## $ PC212 <dbl> 0.003565533, 1.105326750, 0.476560895, 0.021565431, 0.3…
## $ PC213 <dbl> -0.000265590, 0.860996290, 1.003031315, 0.157166969, 0.…
## $ PC214 <dbl> 0.015608339, 0.495158285, 0.250657616, 0.222402364, -0.…
## $ PC215 <dbl> 0.0006436328, -1.2889387924, 0.2191370028, -0.054355925…
## $ PC216 <dbl> 0.002037527, -0.265668045, -0.367671911, 0.010457920, -…
## $ PC217 <dbl> -0.003183836, -1.203520333, -0.507041486, 0.184697077, …
## $ PC218 <dbl> 0.004219977, 0.385071337, 0.069200103, -0.233889101, 0.…
## $ PC219 <dbl> -0.006588963, -1.864520340, -0.558681233, -0.113901226,…
## $ PC220 <dbl> -0.001265919, 0.834893396, -0.288799668, 0.137079808, 0…
## $ PC221 <dbl> 0.003297695, 0.553070015, 0.126517162, 0.158385852, -0.…
## $ PC222 <dbl> 0.0001297895, 0.0810176988, 0.3187879600, -0.0662039819…
## $ PC223 <dbl> -0.001411781, 1.723254626, -0.394759672, 0.255547510, 0…
## $ PC224 <dbl> -0.0031022591, -1.6581835755, -1.2196189053, 0.04237754…
## $ PC225 <dbl> 0.005234873, 0.489186258, -0.948963915, 0.392040536, 0.…
## $ PC226 <dbl> 0.0002689848, -0.4395683805, -0.7497862738, -0.15589689…
## $ PC227 <dbl> -0.002204202, -0.193503938, -0.508344834, -0.180462920,…
## $ PC228 <dbl> 0.003791765, -0.121848422, -0.531398794, 0.008484718, 0…
## $ PC229 <dbl> 0.009629828, -1.090445394, -0.350627039, -0.049937679, …
## $ PC230 <dbl> -0.006596595, 0.746806494, 0.397872821, -0.092868251, 0…
## $ PC231 <dbl> -0.0039564714, 0.8923443910, 0.9008615456, -0.002412030…
## $ PC232 <dbl> -0.0099936785, -1.3188544135, 0.1358971380, -0.09639014…
## $ PC233 <dbl> 0.010834297, -0.156552250, -1.657588033, 0.184780559, 0…
## $ PC234 <dbl> 4.544332e-02, -2.596731e+00, 9.307128e-01, 4.153693e-01…
## $ PC235 <dbl> -5.097396e-13, -3.308216e+00, 1.189449e+00, -2.734266e-…
## $ PC236 <dbl> -2.134543e-13, 3.418837e-01, -1.092200e+00, -4.653813e-…
## $ PC237 <dbl> -2.495728e-13, -4.358125e+00, 7.956980e-01, 2.477197e-1…
## $ PC238 <dbl> -2.475567e-15, -1.199187e+00, 2.017858e-01, -1.217328e-…
## $ PC239 <dbl> 1.236213e-13, -4.363646e+00, -1.212075e+00, 7.771784e-1…
## $ PC240 <dbl> 2.701972e-13, 2.713578e+00, -2.457444e+00, 4.329946e-12…
## $ PC241 <dbl> 7.102746e-13, 1.812087e+00, -7.120476e-01, 4.280854e-13…
## $ PC242 <dbl> -1.580948e-13, 4.504101e-01, -1.654994e+00, 1.433930e-1…
## $ PC243 <dbl> 4.064722e-13, -1.262279e+00, -1.092984e+00, 3.875732e-1…
## $ PC244 <dbl> -3.526041e-13, 8.994788e-01, 9.779800e-01, -8.268150e-1…
## $ PC245 <dbl> 1.243730e-13, -1.756316e-01, 1.432743e+00, 8.312477e-14…
## $ PC246 <dbl> -9.043248e-14, -3.541526e+00, 9.984431e-01, 3.926498e-1…
## $ PC247 <dbl> 2.127947e-14, 5.399782e+00, -6.769646e-01, -1.867612e-1…
## $ PC248 <dbl> 4.420261e-13, 4.448277e+00, 1.140487e+00, 4.998897e-12,…
## $ PC249 <dbl> -1.743935e-13, 2.562737e+00, -3.715780e-01, -2.269978e-…
## $ PC250 <dbl> -6.774601e-15, 5.372262e-01, 3.221396e+00, 8.369012e-13…
## $ PC251 <dbl> 8.733488e-14, 9.644833e-01, -3.282847e-01, -1.105628e-1…
## $ PC252 <dbl> 2.094744e-13, 2.291012e+00, -1.242089e+00, -1.266296e-1…
## $ PC253 <dbl> -7.772568e-14, 3.745651e+00, -4.215903e+00, 8.664184e-1…
## $ PC254 <dbl> -4.003649e-13, 2.698464e+00, -1.806444e+00, -2.658274e-…
## $ PC255 <dbl> -6.408471e-13, -9.329335e-01, -2.683803e+00, -3.400751e…
## $ PC256 <dbl> 3.183630e-13, -3.827894e-01, 2.207179e+00, 1.013435e-12…
## $ PC257 <dbl> 5.487286e-13, 1.157215e+00, 2.316718e+00, 2.941807e-12,…
## $ PC258 <dbl> -1.824274e-13, 1.635076e+00, 8.438208e-01, 5.720420e-12…
## $ PC259 <dbl> -4.601193e-13, -1.159458e+00, 2.870641e-01, -3.447091e-…
## $ PC260 <dbl> 3.042788e-13, 1.781702e+00, -2.721614e+00, -1.052039e-1…
## $ PC261 <dbl> 2.351293e-13, -2.384032e+00, -5.442862e+00, -4.111142e-…
## $ PC262 <dbl> -1.718365e-14, 2.571735e+00, -1.287015e+00, 3.662094e-1…
## $ PC263 <dbl> 2.977985e-14, 2.021474e+00, 5.376115e-01, 5.884688e-14,…
## $ PC264 <dbl> 1.738789e-13, -2.925011e+00, -3.314277e+00, 2.320374e-1…
## $ PC265 <dbl> 1.088463e-13, 2.920760e+00, -1.210785e+00, -2.250419e-1…
## $ PC266 <dbl> 5.725221e-13, 1.150488e+00, 8.609672e-01, 7.396702e-12,…
## $ PC267 <dbl> -4.566323e-13, 3.084175e+00, 1.054608e+00, -2.720738e-1…
## $ PC268 <dbl> 4.951431e-13, 2.722297e+00, 2.379938e+00, 3.712514e-12,…
## $ PC269 <dbl> 9.017731e-14, -7.398670e+00, -1.054852e+00, -1.424526e-…
## $ PC270 <dbl> -3.541477e-13, 6.010025e-01, -3.939164e-01, -3.679252e-…
## $ PC271 <dbl> 2.667421e-13, 1.827106e+00, -3.324207e+00, 7.451580e-13…
## $ PC272 <dbl> -2.308794e-13, -5.546391e+00, 2.371860e+00, -6.967429e-…
## $ PC273 <dbl> -2.955564e-13, -5.504672e+00, -3.353835e-01, -5.378438e…
## $ PC274 <dbl> 2.068973e-13, 1.046736e+00, -1.129663e+00, -5.376053e-1…
## $ PC275 <dbl> -5.006505e-13, -6.083612e+00, -1.488286e+00, -3.086006e…
## $ PC276 <dbl> -4.997061e-13, -6.718384e-01, 1.520541e+00, -6.543628e-…
## $ PC277 <dbl> 3.394243e-13, 3.292575e+00, -8.931513e-01, 1.217444e-12…
## $ PC278 <dbl> -3.645819e-14, -1.344339e+00, -4.024266e+00, -1.167151e…
## $ PC279 <dbl> 2.632300e-13, -1.081679e+00, 2.294405e+00, 1.911863e-12…
## $ PC280 <dbl> 2.034108e-14, 5.175591e+00, -4.730313e+00, -7.450328e-1…
## $ PC281 <dbl> -5.192295e-13, 2.753953e+00, 2.022668e+00, -4.891621e-1…
## $ PC282 <dbl> 1.943768e-13, -1.313238e+00, 1.395188e+00, 2.651543e-12…
## $ PC283 <dbl> 4.619602e-13, 2.075345e+00, 2.817176e+00, 1.417751e-12,…
## $ PC284 <dbl> 3.853956e-13, 6.636591e+00, -2.308326e+00, -6.659703e-1…
## $ PC285 <dbl> 2.109655e-13, -2.083343e-01, -2.508499e-01, -8.300424e-…
## $ PC286 <dbl> 4.230721e-14, 3.449020e+00, 8.533005e-01, -2.553658e-12…
## $ PC287 <dbl> -3.276824e-13, -3.251590e+00, 1.151648e+00, -2.017275e-…
## $ PC288 <dbl> -2.136375e-13, -4.463394e+00, -2.525710e+00, 2.135694e-…
## $ PC289 <dbl> 1.118237e-13, 1.101519e+00, -2.260978e+00, 1.734963e-12…
## $ PC290 <dbl> -2.000904e-14, -4.702403e+00, 1.692531e+00, 3.099149e-1…
## $ PC291 <dbl> -3.695807e-14, -8.039702e-01, 3.037284e-01, 4.362467e-1…
## $ PC292 <dbl> -3.861666e-14, -2.324691e+00, 2.553326e-01, -3.511306e-…
## $ PC293 <dbl> 1.254711e-13, -1.446749e-01, 5.810007e-01, 2.898260e-12…
## $ PC294 <dbl> 4.650764e-14, 2.004190e+00, -1.375500e+00, 2.996977e-12…
## $ PC295 <dbl> 4.429323e-13, -2.886743e+00, 9.719293e-01, 7.873766e-12…
## $ PC296 <dbl> -1.141885e-14, -4.107959e-01, -1.547576e+00, -4.400725e…
## $ PC297 <dbl> 2.200227e-13, -1.225073e+00, -1.267860e+00, 5.296767e-1…
## $ PC298 <dbl> -4.619274e-13, -5.361683e-01, 7.045472e-01, -8.033041e-…
## $ PC299 <dbl> 7.868535e-14, -7.928860e-01, -8.352721e-01, 4.670518e-1…
## $ PC300 <dbl> 3.162477e-13, 2.614843e-01, -4.985688e+00, 8.809558e-12…
## $ PC301 <dbl> 8.797869e-13, -1.068521e+00, -1.513437e+00, 6.128644e-1…
## $ PC302 <dbl> -2.749162e-14, 1.511249e-01, -1.900435e+00, -4.726299e-…
## $ PC303 <dbl> 5.251196e-13, -9.878842e-01, 4.079732e-02, 6.202328e-12…
## $ PC304 <dbl> -2.493274e-13, 4.074596e-01, 2.219608e+00, 3.074444e-12…
## $ PC305 <dbl> -2.752048e-03, -1.098190e+00, 6.887316e-01, 7.882089e-0…
## $ PC306 <dbl> -0.003539233, -2.098233402, -3.882046746, 0.022487714, …
## $ PC307 <dbl> -0.001827828, 3.784935379, 1.822634345, -0.041169414, -…
## $ PC308 <dbl> -0.0035419401, 3.5322109027, 0.9748611423, -0.158461089…
## $ PC309 <dbl> -0.0032030420, 2.0152389790, -0.5156415696, 0.113071152…
## $ PC310 <dbl> 0.005492634, 2.659002893, 2.530468925, 0.047746302, 0.0…
## $ PC311 <dbl> -0.001160866, -1.531758135, -3.418987665, -0.029453680,…
## $ PC312 <dbl> -0.006591333, -1.026135686, 0.546856830, 0.078239329, -…
## $ PC313 <dbl> -0.0005555683, -0.9534210888, -1.9786866349, -0.1665768…
## $ PC314 <dbl> 0.003490666, -0.676163106, 3.975044210, -0.035803803, -…
## $ PC315 <dbl> -0.0002580851, -2.3682427644, 1.5666406819, 0.002703384…
## $ PC316 <dbl> -0.0031456797, -0.6956393334, 0.6089733766, 0.009485282…
## $ PC317 <dbl> 1.010728e-02, 2.918643e+00, -1.139730e+00, -1.739954e-0…
## $ PC318 <dbl> 0.0136925748, 3.6196123289, 2.2452346605, 0.1230945291,…
## $ PC319 <dbl> 7.805497e-03, 2.197652e-01, -5.229190e-01, -4.956363e-0…
## $ PC320 <dbl> -6.681769e-05, -4.987003e-02, 5.912350e-01, 3.592559e-0…
## $ PC321 <dbl> -0.008849971, -1.594034638, -0.560857300, 0.103771029, …
## $ PC322 <dbl> 0.001073815, 1.782688346, 1.763470890, 0.042893626, -0.…
## $ PC323 <dbl> 0.004961900, 0.693097857, 1.322107033, -0.105125162, -0…
## $ PC324 <dbl> -0.008332585, -0.924604815, 0.035539458, -0.011160264, …
## $ PC325 <dbl> -0.0006209019, -1.2022431598, -0.9517389050, -0.0384782…
## $ PC326 <dbl> 0.005401590, 0.620433856, -0.618935812, -0.169672325, 0…
## $ PC327 <dbl> -0.0002318278, -0.7147232816, -0.0617406243, -0.0310831…
## $ PC328 <dbl> 0.0047634409, 1.5960195646, 1.6200357268, 0.0500769244,…
## $ PC329 <dbl> -0.0006632301, -0.7489418660, -2.1613597618, 0.03876811…
## $ PC330 <dbl> -0.0017860342, -0.0466816003, -1.0104508734, -0.0053120…
## $ PC331 <dbl> 0.001773990, -0.930440124, -1.447451792, -0.076891389, …
## $ PC332 <dbl> -0.005799567, -0.200600894, -0.783028034, -0.020112772,…
## $ PC333 <dbl> 0.0064600280, 0.7502750203, 0.9155327420, -0.0525169058…
## $ PC334 <dbl> -0.002157544, -0.415803957, -0.926700672, -0.031890267,…
## $ PC335 <dbl> -0.0008388314, 0.9492232389, 1.1839559924, -0.025439799…
## $ PC336 <dbl> -0.005691695, 0.714068835, 1.794194342, 0.168454490, -0…
## $ PC337 <dbl> 0.0037784456, -0.0899590406, -1.1350093128, -0.03856113…
## $ PC338 <dbl> -0.0006788045, 0.7083304418, 1.0474260650, 0.0687635470…
## $ PC339 <dbl> 0.0085777481, -0.0306833375, 5.8598633983, -0.028446321…
## $ PC340 <dbl> -0.007022861, 0.781735022, 2.333777026, -0.043889935, -…
## $ PC341 <dbl> -0.006835553, -1.730152799, 8.793915946, 0.001261258, 0…
## $ PC342 <dbl> 0.001484162, 1.327467871, 4.769473924, 0.011748773, 0.0…
## $ PC343 <dbl> -0.0006034311, 0.6190231417, -0.3461948187, 0.029709760…
## $ PC344 <dbl> -0.005195972, -0.856928759, -1.129122062, 0.004486849, …
## $ PC345 <dbl> 0.003265117, -2.159266713, -0.011422420, 0.006746204, 0…
## $ PC346 <dbl> 0.0026329011, -0.6160895880, -0.9471170527, -0.00948431…
## $ PC347 <dbl> 0.007916581, -1.295798726, 0.515576259, 0.056884071, 0.…
## $ PC348 <dbl> 0.002410618, 2.381510961, -0.850818927, -0.082057559, 0…
## $ PC349 <dbl> -7.212014e-03, 2.851473e-01, 2.771604e-01, 3.196899e-02…
## $ PC350 <dbl> -0.003575917, 0.466907172, 0.049324437, 0.009682000, -0…
## $ PC351 <dbl> -0.002977418, -0.072128445, -0.216280609, 0.029431768, …
## $ PC352 <dbl> -2.296895e-03, 2.895660e-01, -4.411920e-02, -3.338306e-…
## $ PC353 <dbl> 0.0009787517, 0.1576487503, -0.3391854905, -0.017136062…
## $ PC354 <dbl> -0.0006376691, -0.0037994360, -0.1879111539, 0.01433350…
## $ PC355 <dbl> 0.0006020932, -0.1712257896, 0.1230339855, 0.0467088114…
## $ PC356 <dbl> -5.743524e-05, -8.285974e-02, 8.402902e-02, 1.030330e-0…
## $ PC357 <dbl> 0.0049555655, 0.1177691393, -0.0322324629, 0.0068472419…
## $ PC358 <dbl> 6.386107e-05, 8.528096e-02, 2.308962e-01, 1.337644e-02,…
## $ PC359 <dbl> -4.353760e-02, -4.744780e-03, -2.997123e-02, -3.345015e…
## $ PC360 <dbl> -3.016789e-13, 3.388878e-12, -9.351191e-12, -9.420365e-…
## $ PC361 <dbl> -4.695421e-13, 2.085462e-11, 3.441747e-13, -3.800931e-1…
## $ PC362 <dbl> 5.510182e-13, -1.195956e-11, -9.542551e-12, 6.950169e-1…
## $ PC363 <dbl> 1.098691e-13, -3.574859e-12, -6.916146e-12, 1.567590e-1…
## $ PC364 <dbl> -5.521494e-13, 1.307293e-11, 2.128134e-11, -2.881991e-1…
## $ PC365 <dbl> 8.227010e-13, 2.975602e-11, 1.557617e-11, 6.094720e-12,…
## $ PC366 <dbl> -4.684530e-13, -5.511271e-12, -2.120127e-11, -2.218969e…
## $ PC367 <dbl> -4.714613e-13, -1.856394e-11, -1.074940e-11, -4.463169e…
## $ PC368 <dbl> 1.658082e-12, 2.294873e-11, 2.682899e-12, 8.957725e-12,…
## $ PC369 <dbl> 3.370127e-13, -7.465780e-13, -1.957091e-11, 2.742106e-1…
## $ PC370 <dbl> 5.024913e-13, 6.085573e-12, -5.232142e-12, 2.251689e-12…
## $ PC371 <dbl> 3.755635e-13, 8.588905e-12, 1.775505e-13, 4.508681e-12,…
## $ PC372 <dbl> -4.870356e-14, 5.098481e-12, 3.699921e-11, -5.833349e-1…
## $ PC373 <dbl> 1.347765e-13, -5.259675e-12, -1.049113e-11, 1.624689e-1…
## $ PC374 <dbl> 2.015205e-13, -7.931529e-13, 1.873245e-12, 4.838208e-13…
## $ PC375 <dbl> 8.373806e-13, 8.599259e-12, 1.855729e-11, 5.512483e-12,…
## $ PC376 <dbl> -2.993702e-13, 6.209966e-12, -1.236690e-11, -3.207131e-…
## $ PC377 <dbl> 6.262152e-14, -4.448341e-11, -6.203892e-11, 9.991606e-1…
## $ PC378 <dbl> 5.055672e-15, -1.488483e-11, 1.127107e-11, -1.819656e-1…
## $ PC379 <dbl> 7.304455e-14, 1.202862e-11, -2.122429e-11, -7.561394e-1…
## $ PC380 <dbl> -6.223601e-14, -1.666430e-11, -1.558682e-11, -1.804311e…
## $ PC381 <dbl> 2.863111e-13, 2.333011e-12, 2.690000e-11, 4.097401e-13,…
## $ PC382 <dbl> 7.805000e-14, -1.363604e-11, -5.358467e-12, 3.842669e-1…
## $ PC383 <dbl> 1.290817e-13, 5.313745e-12, 2.617461e-12, 8.729034e-13,…
## $ PC384 <dbl> 1.209060e-13, -1.414361e-11, 1.583384e-12, 5.197519e-13…
## $ PC385 <dbl> -4.065136e-15, -3.048828e-12, -2.314881e-11, -6.446881e…
## $ PC386 <dbl> 3.027766e-14, 2.105315e-12, -1.292603e-11, -9.139289e-1…
## $ PC387 <dbl> -9.198047e-14, 1.595923e-11, -5.182389e-12, 2.357712e-1…
## $ PC388 <dbl> -5.387720e-13, -1.448850e-11, -8.990924e-12, -2.838096e…
## $ PC389 <dbl> 2.738009e-13, 1.351633e-13, 2.910701e-12, 3.343689e-12,…
## $ PC390 <dbl> -2.324179e-13, 6.857801e-12, -1.479998e-12, -9.230356e-…
## $ PC391 <dbl> 2.889298e-13, -5.050689e-13, -7.846528e-12, 2.637003e-1…
## $ PC392 <dbl> -3.876843e-13, -7.836867e-12, 5.035355e-12, -5.686987e-…
## $ PC393 <dbl> 6.910310e-14, -6.254076e-11, 2.054020e-11, -4.464290e-1…
## $ PC394 <dbl> -4.321226e-14, 9.067120e-12, -7.160536e-12, -1.080528e-…
## $ PC395 <dbl> -3.154492e-13, 2.429443e-12, -4.029608e-12, 6.245524e-1…
## $ PC396 <dbl> -2.968354e-13, 1.376641e-11, 2.731950e-12, -1.143731e-1…
## $ PC397 <dbl> 4.887632e-14, 5.757585e-13, 1.566592e-11, -1.411584e-12…
## $ PC398 <dbl> 4.032365e-13, 1.586060e-11, -1.199829e-12, 3.712513e-12…
## $ PC399 <dbl> -2.621817e-13, -2.287813e-14, 8.994870e-12, -1.417837e-…
## $ PC400 <dbl> -1.848229e-13, 7.795712e-12, 8.914404e-13, -1.579756e-1…
## $ PC401 <dbl> 3.319975e-13, 9.721885e-14, 9.525108e-12, 4.232176e-12,…
## $ PC402 <dbl> -6.643963e-14, 9.137490e-13, 1.437141e-11, 6.108884e-13…
## $ PC403 <dbl> -7.241014e-14, -7.122198e-12, 8.390923e-12, 1.917778e-1…
## $ PC404 <dbl> -2.076459e-13, -1.326651e-11, -1.890053e-11, -1.353809e…
## $ PC405 <dbl> 9.984112e-14, 1.076164e-11, 1.095611e-11, -5.456981e-13…
## $ PC406 <dbl> -2.040290e-13, -1.127584e-12, -2.669729e-11, -2.361269e…
## $ PC407 <dbl> -2.885431e-14, -3.865085e-12, 9.131689e-12, 6.653126e-1…
## $ PC408 <dbl> -1.030434e-13, -8.496198e-12, 9.502583e-12, -2.431212e-…
## $ PC409 <dbl> 4.936651e-13, -1.982776e-11, 3.646660e-12, 3.547823e-12…
## $ PC410 <dbl> -2.494358e-13, 3.740589e-11, -2.500964e-11, -1.658755e-…
## $ PC411 <dbl> 5.782808e-13, -4.954075e-12, 8.309358e-12, 5.498782e-12…
## $ PC412 <dbl> 6.529677e-13, 1.026938e-11, -2.132102e-11, 5.152598e-12…
## $ PC413 <dbl> -9.319094e-15, -1.021062e-11, -1.036303e-11, 2.637021e-…
## $ PC414 <dbl> 1.074379e-13, 2.009443e-12, -2.737501e-12, 4.257007e-13…
## $ PC415 <dbl> 2.456300e-13, -8.197998e-12, 1.046542e-11, 2.314253e-12…
## $ PC416 <dbl> 1.035026e-13, -1.219996e-11, -2.170164e-12, 1.818256e-1…
## $ PC417 <dbl> 2.030742e-14, -8.001932e-13, -2.502575e-12, 3.495371e-1…
## $ PC418 <dbl> -3.187960e-14, -6.870018e-12, -9.531009e-12, -2.678159e…
## $ PC419 <dbl> -2.100020e-13, 7.667999e-13, 9.093606e-12, -2.022597e-1…
## $ PC420 <dbl> -2.176478e-14, 1.014556e-12, 5.184245e-12, -3.684389e-1…
## $ PC421 <dbl> -1.629617e-13, 9.193772e-12, 6.886857e-12, -2.346597e-1…
## $ PC422 <dbl> -1.535061e-13, -2.754833e-14, 3.076922e-12, -2.085259e-…
## $ PC423 <dbl> -2.647448e-13, -2.166009e-12, 4.148851e-12, -2.178127e-…
## $ PC424 <dbl> -1.645318e-13, -1.043314e-11, 2.158546e-12, -2.138526e-…
## $ PC425 <dbl> 1.495654e-13, -3.044092e-12, -6.371723e-12, 6.768693e-1…
## $ PC426 <dbl> -2.635363e-14, -5.126138e-12, -4.661435e-12, -2.708750e…
## $ PC427 <dbl> -2.381190e-13, 9.649910e-12, 5.203765e-13, -1.781100e-1…
## $ PC428 <dbl> 9.786843e-14, 2.941582e-11, -3.961682e-12, 2.110994e-12…
## $ PC429 <dbl> 5.208849e-13, 9.709235e-14, 1.535821e-12, 4.378511e-12,…
## $ PC430 <dbl> 3.362871e-13, -1.647942e-12, -5.066989e-12, 1.665037e-1…
## $ PC431 <dbl> 2.191638e-13, 8.252730e-12, 1.125049e-11, 2.042806e-12,…
## $ PC432 <dbl> 1.077374e-13, 1.449400e-11, 1.457628e-11, 1.246361e-12,…
## $ PC433 <dbl> -1.429901e-13, 3.113811e-12, -1.410831e-11, -1.120820e-…
## $ PC434 <dbl> 5.683753e-13, 4.616167e-12, -4.892800e-12, 4.439452e-12…
## $ PC435 <dbl> 2.892847e-13, 2.562475e-13, -8.129247e-12, 1.958642e-12…
## $ PC436 <dbl> -2.337293e-13, -7.950577e-12, -1.679863e-11, -2.488724e…
## $ PC437 <dbl> 9.552409e-14, 2.263903e-12, 8.785037e-12, 1.575479e-12,…
## $ PC438 <dbl> 6.859279e-14, 3.525266e-12, 1.282669e-12, 2.693896e-12,…
## $ PC439 <dbl> -2.295937e-13, -1.015567e-11, 5.735318e-12, -1.804341e-…
## $ PC440 <dbl> -1.067646e-13, -6.107059e-12, -4.222259e-12, -1.797544e…
## $ PC441 <dbl> -1.717900e-13, 1.256920e-11, -7.204855e-12, -5.079482e-…
## $ PC442 <dbl> 2.287689e-13, 3.267005e-12, 9.140455e-12, 1.940192e-12,…
## $ PC443 <dbl> 1.113840e-13, 2.931141e-12, 3.455070e-12, -7.414893e-13…
## $ PC444 <dbl> 2.654062e-13, 1.229516e-11, 6.206001e-14, 1.674765e-12,…
library(usemodels)
usemodels::use_xgboost(classification ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = classification ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(66780)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
xgboost_spec <-
boost_tree(trees = tune(), tree_depth = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_rec) %>%
add_model(xgboost_spec)
library(doParallel)
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: parallel
library(foreach)
tree_grid <- grid_regular(trees(), tree_depth(), levels = 10)
doParallel::registerDoParallel()
set.seed(2242)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = data_cv, grid = 5,
control = control_grid(save_pred = TRUE))
collect_metrics(xgboost_tune)
## # A tibble: 15 × 8
## trees tree_depth .metric .estimator mean n std_err .config
## <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 1812 3 accuracy binary 0.508 10 0.0111 Preprocessor1_Mo…
## 2 1812 3 brier_class binary 0.410 10 0.00743 Preprocessor1_Mo…
## 3 1812 3 roc_auc binary 0.511 10 0.0111 Preprocessor1_Mo…
## 4 795 4 accuracy binary 0.496 10 0.00951 Preprocessor1_Mo…
## 5 795 4 brier_class binary 0.408 10 0.00935 Preprocessor1_Mo…
## 6 795 4 roc_auc binary 0.508 10 0.0117 Preprocessor1_Mo…
## 7 1435 7 accuracy binary 0.498 10 0.0130 Preprocessor1_Mo…
## 8 1435 7 brier_class binary 0.395 10 0.00912 Preprocessor1_Mo…
## 9 1435 7 roc_auc binary 0.506 10 0.0104 Preprocessor1_Mo…
## 10 53 10 accuracy binary 0.491 10 0.0126 Preprocessor1_Mo…
## 11 53 10 brier_class binary 0.362 10 0.00914 Preprocessor1_Mo…
## 12 53 10 roc_auc binary 0.498 10 0.0143 Preprocessor1_Mo…
## 13 878 13 accuracy binary 0.487 10 0.0109 Preprocessor1_Mo…
## 14 878 13 brier_class binary 0.404 10 0.00809 Preprocessor1_Mo…
## 15 878 13 roc_auc binary 0.509 10 0.0119 Preprocessor1_Mo…
collect_predictions(xgboost_tune) %>%
group_by(id) %>%
roc_curve(classification, .pred_possible) %>%
autoplot()
xgboost_last <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
last_fit(data_split)
## → A | warning: ! There are new levels in `county`: "Westchester County", "Transylvania
## County", "Sampson County", "Robeson County", "Wilkes County", "Duplin
## County", "Sarpy County", "Tioga County", "Ross County", "Payne County",
## "Tulsa County", "Athens County", "Hughes County", "Logan County", "McClain
## County", "Harney County", "Potter County", "Mobile County", …, "McLean
## County", and "Bourbon County".
## ℹ Consider using step_novel() (`?recipes::step_novel()`) \ before
## `step_dummy()` to handle unseen values.
##
There were issues with some computations A: x1
There were issues with some computations A: x1
collect_metrics(xgboost_last)
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy binary 0.523 Preprocessor1_Model1
## 2 roc_auc binary 0.529 Preprocessor1_Model1
## 3 brier_class binary 0.413 Preprocessor1_Model1
collect_predictions(xgboost_last) %>%
yardstick::conf_mat(classification, .pred_class) %>%
autoplot()
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
xgboost_last %>%
workflows::extract_fit_engine() %>%
vip()
Step normalize had a negtive effect on the model. Adding Step YeoJohnson helped the model show a bit of improvement to .420. Step pca helped in getting the model up to an accuracy of .523. I put the threshold at .75 because .50 and .99 made the model worse. Algorithm tuning had no postive affect on the model. Overall, step pca and step yeojohnson helped the model the most but the model is still not that great.