library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
stock_df <- read_csv("stock_df.csv")
## Rows: 5 Columns: 106
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): company
## dbl (105): 2019_week1, 2019_week2, 2019_week3, 2019_week4, 2019_week5, 2019_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stock_df_long <- stock_df %>%
pivot_longer(
cols = starts_with("2019_week"), # select all columns starting with "2019_week"
names_to = c("year", "week"), # create two new columns
names_pattern = "(\\d{4})_week(\\d+)", # extract year and week number using regex
values_to = "price" # name for the numeric values
) %>%
mutate(
year = as.integer(year),
week = as.integer(week)
)
stock_df_long <- stock_df %>%
pivot_longer(
cols = matches("\\d{4}_week\\d+"), # match all columns like "2019_week1", "2020_week3", etc.
names_to = c("year", "week"),
names_pattern = "(\\d{4})_week(\\d+)",
values_to = "price"
) %>%
mutate(
year = as.integer(year),
week = as.integer(week)
)
stock_df_long
## # A tibble: 525 × 4
## company year week price
## <chr> <int> <int> <dbl>
## 1 Amazon 2019 1 1848.
## 2 Amazon 2019 2 1641.
## 3 Amazon 2019 3 1696.
## 4 Amazon 2019 4 1671.
## 5 Amazon 2019 5 1626.
## 6 Amazon 2019 6 1588.
## 7 Amazon 2019 7 1608.
## 8 Amazon 2019 8 1632.
## 9 Amazon 2019 9 1672.
## 10 Amazon 2019 10 1621.
## # ℹ 515 more rows