library(readr)
library(pander)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
stocks <- read_rds("https://github.com/byuistats/data/raw/master/Dart_Expert_Dow_6month_anova/Dart_Expert_Dow_6month_anova.RDS")
library(tidyr)
library(dplyr)
# Separate the contest_period into two columns
stocks1 <- stocks %>%
separate(contest_period, into = c("start_period", "end_period"), sep = "-")
# Extract month and year from the "end_period"
stocks1$month_end <- sub("[0-9]+$", "", stocks1$end_period) # Extract month name
stocks1$year_end <- sub("^[^0-9]+", "", stocks1$end_period) # Extract year
# Ensure month names are capitalized for consistency
stocks1$month_end <- tools::toTitleCase(stocks1$month_end)
saveRDS(stocks1, file = "tidy_stocks.rds")
# Step 1: Summarize the data to avoid duplicates and remove invalid rows
stocks_summarized <- stocks1 %>%
group_by(month_end, year_end) %>%
summarize(value = mean(value, na.rm = TRUE)) # Averaging duplicate values
## `summarise()` has grouped output by 'month_end'. You can override using the
## `.groups` argument.
# Step 2: Filter out incorrect month names (fix spelling and remove invalid months)
valid_months <- c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December")
stocks_summarized <- stocks_summarized %>%
filter(month_end %in% valid_months)
# Step 3: Pivot the data wider, with months as the first column and years as subsequent columns
stocks_wide <- stocks_summarized %>%
pivot_wider(
names_from = year_end, # The years will become column headers
values_from = value # The values to fill in the table
) %>%
arrange(factor(month_end, levels = month.name)) # Ensure months are in the correct order
# Step 4: Replace NA values with dashes
stocks_wide <- stocks_wide %>%
mutate(across(where(is.numeric), ~ ifelse(is.na(.), "-", as.character(round(., 1))))) # Round to 1 decimal place
# Step 5: Rename the month_end column to "Month"
stocks_wide <- stocks_wide %>%
rename(Month = month_end)
# View the final result
pander(stocks_wide)
| January |
-19.5 |
10.6 |
0.3 |
19.2 |
8.3 |
13.6 |
10.7 |
1.7 |
- |
| February |
15.8 |
23.8 |
-2.9 |
3.6 |
- |
18.7 |
30.8 |
6.1 |
- |
| March |
15.7 |
11.2 |
17.6 |
-1 |
-1.2 |
23.5 |
-5.8 |
8.8 |
- |
| April |
46.6 |
1.4 |
-3.9 |
-1.5 |
8.8 |
16.7 |
-2.7 |
9.8 |
- |
| May |
33.6 |
8 |
13.7 |
-2.3 |
13.1 |
7.5 |
0.8 |
13 |
- |
| June |
18 |
-3.2 |
1.9 |
-9.3 |
33.5 |
-1.4 |
18.1 |
14.2 |
5.1 |
| July |
23.3 |
2.2 |
-3.4 |
0.5 |
22.2 |
-1.6 |
12.1 |
12.1 |
13.2 |
| August |
38.4 |
0.3 |
0.7 |
8.5 |
17.8 |
8.9 |
9.7 |
-17.4 |
-4.7 |
| September |
7.3 |
-4.7 |
-3.3 |
11.9 |
30.8 |
7.4 |
35.4 |
-22 |
-12.1 |
| October |
18.3 |
-1.5 |
1.9 |
14.8 |
12.8 |
5.4 |
20.7 |
- |
-20.9 |
| November |
-7.8 |
-0.3 |
-8 |
10 |
22.6 |
8.8 |
2.3 |
- |
-24.5 |
| December |
7.7 |
5.4 |
- |
-2.3 |
5.7 |
21.2 |
-2.8 |
- |
-14 |