# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
This analysis uses the weekly closing prices for five US listed companies in 2019.
# Import data
stock_df <- read_csv("stock_df.csv")
## Rows: 5 Columns: 106
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): company
## dbl (105): 2019_week1, 2019_week2, 2019_week3, 2019_week4, 2019_week5, 2019_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display the data structure
stock_df
## # A tibble: 5 × 106
## company `2019_week1` `2019_week2` `2019_week3` `2019_week4` `2019_week5`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Amazon 1848. 1641. 1696. 1671. 1626.
## 2 Apple 73.4 38.1 39.2 39.4 41.6
## 3 Facebook 205. 144. 150. 149. 166.
## 4 Google 1337. 1057. 1098. 1091. 1111.
## 5 Microsoft 158. 103. 108. 107. 103.
## # ℹ 100 more variables: `2019_week6` <dbl>, `2019_week7` <dbl>,
## # `2019_week8` <dbl>, `2019_week9` <dbl>, `2019_week10` <dbl>,
## # `2019_week11` <dbl>, `2019_week12` <dbl>, `2019_week13` <dbl>,
## # `2019_week14` <dbl>, `2019_week15` <dbl>, `2019_week16` <dbl>,
## # `2019_week17` <dbl>, `2019_week18` <dbl>, `2019_week19` <dbl>,
## # `2019_week20` <dbl>, `2019_week21` <dbl>, `2019_week22` <dbl>,
## # `2019_week23` <dbl>, `2019_week24` <dbl>, `2019_week25` <dbl>, …
The original dataset contains: - Rows: 5 companies (Amazon, Apple, Facebook, Google, Microsoft) - Columns: 106 columns (1 company name column + 105 week columns) - Format: Wide format with weeks as separate columns
# Reshape data from wide to long format
stock_df_long <- stock_df %>%
pivot_longer(
cols = !company,
names_to = c("year", "week"),
names_sep = "_week",
names_transform = list(year = as.integer, week = as.integer),
values_to = "price"
)
# Display the reshaped data
stock_df_long
## # A tibble: 525 × 4
## company year week price
## <chr> <int> <int> <dbl>
## 1 Amazon 2019 1 1848.
## 2 Amazon 2019 2 1641.
## 3 Amazon 2019 3 1696.
## 4 Amazon 2019 4 1671.
## 5 Amazon 2019 5 1626.
## 6 Amazon 2019 6 1588.
## 7 Amazon 2019 7 1608.
## 8 Amazon 2019 8 1632.
## 9 Amazon 2019 9 1672.
## 10 Amazon 2019 10 1621.
## # ℹ 515 more rows
# Display summary statistics
cat("Total observations:", nrow(stock_df_long), "\n")
## Total observations: 525
cat("Number of companies:", n_distinct(stock_df_long$company), "\n")
## Number of companies: 5
cat("Number of weeks:", n_distinct(stock_df_long$week), "\n\n")
## Number of weeks: 53
# Summary by company
stock_df_long %>%
group_by(company) %>%
summarise(
min_price = min(price),
mean_price = mean(price),
max_price = max(price),
observations = n()
)
## # A tibble: 5 × 5
## company min_price mean_price max_price observations
## <chr> <dbl> <dbl> <dbl> <int>
## 1 Amazon 1588. 2235. 3402. 105
## 2 Apple 38.1 74.2 133. 105
## 3 Facebook 144. 209. 294. 105
## 4 Google 1057. 1341. 1828. 105
## 5 Microsoft 103. 163. 229. 105
# Create line plot
ggplot(stock_df_long, aes(x = week, y = price, color = company)) +
geom_line(linewidth = 1) +
labs(
title = "Weekly Stock Prices for US Tech Companies (2019)",
x = "Week Number",
y = "Stock Price (USD)",
color = "Company"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "bottom"
)
The data has been successfully reshaped from wide format (5 rows × 106 columns) to long format (525 rows × 4 columns), making it more suitable for analysis and visualization using tidyverse tools.