nyt_titles <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-05-10/nyt_titles.tsv')
## Rows: 7431 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): title, author
## dbl (5): id, year, total_weeks, debut_rank, best_rank
## date (1): first_week
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(nyt_titles)
Data summary
| Name |
nyt_titles |
| Number of rows |
7431 |
| Number of columns |
8 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| Date |
1 |
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| title |
0 |
1 |
1 |
74 |
0 |
7172 |
0 |
| author |
4 |
1 |
4 |
73 |
0 |
2205 |
0 |
Variable type: Date
| first_week |
0 |
1 |
1931-10-12 |
2020-12-06 |
2000-06-25 |
3348 |
Variable type: numeric
| id |
0 |
1 |
3715.00 |
2145.29 |
0 |
1857.5 |
3715 |
5572.5 |
7430 |
▇▇▇▇▇ |
| year |
0 |
1 |
1989.61 |
26.23 |
1931 |
1968.0 |
2000 |
2011.0 |
2020 |
▂▂▂▃▇ |
| total_weeks |
0 |
1 |
8.13 |
11.21 |
1 |
2.0 |
4 |
10.0 |
178 |
▇▁▁▁▁ |
| debut_rank |
0 |
1 |
7.90 |
4.57 |
1 |
4.0 |
8 |
12.0 |
17 |
▇▆▅▅▅ |
| best_rank |
0 |
1 |
6.91 |
4.57 |
1 |
3.0 |
6 |
10.0 |
17 |
▇▅▃▃▂ |
data1 <- nyt_titles %>%
#treat missing values
select(-author, -id, -title, -year) %>%
na.omit() %>%
#log transform variables with pos-skewed distribution
mutate(total_weeks = log(total_weeks))
data_binarized_tbl1 <- data1 %>%
select(-first_week) %>%
binarize()
#step 2 correlate
data_corr_tbl1 <- data_binarized_tbl1 %>%
correlate(total_weeks__0.693147180559945_1.38629436111989)
data_corr_tbl1 %>%
plot_correlation_funnel()
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
## Please report the issue at
## <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
## Please report the issue at
## <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
