library(readr)
library(pander)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
stocks <- read_rds("https://github.com/byuistats/data/raw/master/Dart_Expert_Dow_6month_anova/Dart_Expert_Dow_6month_anova.RDS")
library(tidyr)
library(dplyr)

# Separate the contest_period into two columns
stocks1 <- stocks %>%
  separate(contest_period, into = c("start_period", "end_period"), sep = "-")

# Extract month and year from the "end_period"
stocks1$month_end <- sub("[0-9]+$", "", stocks1$end_period)  # Extract month name
stocks1$year_end <- sub("^[^0-9]+", "", stocks1$end_period)  # Extract year

# Ensure month names are capitalized for consistency
stocks1$month_end <- tools::toTitleCase(stocks1$month_end)
saveRDS(stocks1, file = "tidy_stocks.rds")
# Step 1: Summarize the data to avoid duplicates and remove invalid rows
stocks_summarized <- stocks1 %>%
  group_by(month_end, year_end) %>%
  summarize(value = mean(value, na.rm = TRUE))  # Averaging duplicate values
## `summarise()` has grouped output by 'month_end'. You can override using the
## `.groups` argument.
# Step 2: Filter out incorrect month names (fix spelling and remove invalid months)
valid_months <- c("January", "February", "March", "April", "May", "June",
                  "July", "August", "September", "October", "November", "December")

stocks_summarized <- stocks_summarized %>%
  filter(month_end %in% valid_months)

# Step 3: Pivot the data wider, with months as the first column and years as subsequent columns
stocks_wide <- stocks_summarized %>%
  pivot_wider(
    names_from = year_end,    # The years will become column headers
    values_from = value       # The values to fill in the table
  ) %>%
  arrange(factor(month_end, levels = month.name))  # Ensure months are in the correct order

# Step 4: Replace NA values with dashes
stocks_wide <- stocks_wide %>%
  mutate(across(where(is.numeric), ~ ifelse(is.na(.), "-", as.character(round(., 1)))))  # Round to 1 decimal place

# Step 5: Rename the month_end column to "Month"
stocks_wide <- stocks_wide %>%
  rename(Month = month_end)

# View the final result
pander(stocks_wide)
Month 1991 1992 1993 1994 1995 1996 1997 1998 1990
January -19.5 10.6 0.3 19.2 8.3 13.6 10.7 1.7 -
February 15.8 23.8 -2.9 3.6 - 18.7 30.8 6.1 -
March 15.7 11.2 17.6 -1 -1.2 23.5 -5.8 8.8 -
April 46.6 1.4 -3.9 -1.5 8.8 16.7 -2.7 9.8 -
May 33.6 8 13.7 -2.3 13.1 7.5 0.8 13 -
June 18 -3.2 1.9 -9.3 33.5 -1.4 18.1 14.2 5.1
July 23.3 2.2 -3.4 0.5 22.2 -1.6 12.1 12.1 13.2
August 38.4 0.3 0.7 8.5 17.8 8.9 9.7 -17.4 -4.7
September 7.3 -4.7 -3.3 11.9 30.8 7.4 35.4 -22 -12.1
October 18.3 -1.5 1.9 14.8 12.8 5.4 20.7 - -20.9
November -7.8 -0.3 -8 10 22.6 8.8 2.3 - -24.5
December 7.7 5.4 - -2.3 5.7 21.2 -2.8 - -14