# Load required libraries
library(tidyverse)
library(dplyr)
First, we import the stock price data from the CSV file:
# Import the stock data
stock_df <- read_csv("stock_df.csv")
# Display the structure of the data
glimpse(stock_df)
## Rows: 5
## Columns: 106
## $ company <chr> "Amazon", "Apple", "Facebook", "Google", "Microsoft"
## $ `2019_week1` <dbl> 1847.8400, 73.4125, 205.2500, 1337.0200, 157.7000
## $ `2019_week2` <dbl> 1640.5601, 38.0725, 143.8000, 1057.1899, 102.8000
## $ `2019_week3` <dbl> 1696.200, 39.205, 150.040, 1098.260, 107.710
## $ `2019_week4` <dbl> 1670.57, 39.44, 149.01, 1090.99, 107.17
## $ `2019_week5` <dbl> 1626.23, 41.63, 165.71, 1110.75, 102.78
## $ `2019_week6` <dbl> 1588.2200, 42.6025, 167.3300, 1095.0601, 105.6700
## $ `2019_week7` <dbl> 1607.950, 42.605, 162.500, 1113.650, 108.220
## $ `2019_week8` <dbl> 1631.5601, 43.2425, 161.8900, 1110.3700, 110.9700
## $ `2019_week9` <dbl> 1671.7300, 43.7425, 162.2800, 1140.9900, 112.5300
## $ `2019_week10` <dbl> 1620.8000, 43.2275, 169.6000, 1142.3199, 110.5100
## $ `2019_week11` <dbl> 1712.36, 46.53, 165.98, 1184.46, 115.91
## $ `2019_week12` <dbl> 1764.7700, 47.7625, 164.3400, 1205.5000, 117.0500
## $ `2019_week13` <dbl> 1780.7500, 47.4875, 166.6900, 1173.3101, 117.9400
## $ `2019_week14` <dbl> 1837.28, 49.25, 175.72, 1207.15, 119.89
## $ `2019_week15` <dbl> 1843.0601, 49.7175, 179.1000, 1217.8700, 120.9500
## $ `2019_week16` <dbl> 1861.690, 50.965, 178.280, 1236.370, 123.370
## $ `2019_week17` <dbl> 1950.630, 51.075, 191.490, 1272.180, 129.890
## $ `2019_week18` <dbl> 1962.4600, 52.9375, 195.4700, 1185.4000, 128.9000
## $ `2019_week19` <dbl> 1889.980, 49.295, 188.340, 1164.270, 127.130
## $ `2019_week20` <dbl> 1869.00, 47.25, 185.30, 1162.30, 128.07
## $ `2019_week21` <dbl> 1823.2800, 44.7425, 181.0600, 1133.4700, 126.2400
## $ `2019_week22` <dbl> 1775.0699, 43.7675, 177.4700, 1103.6300, 123.6800
## $ `2019_week23` <dbl> 1804.0300, 47.5375, 173.3500, 1066.0400, 131.4000
## $ `2019_week24` <dbl> 1869.670, 48.185, 181.330, 1085.350, 132.450
## $ `2019_week25` <dbl> 1911.300, 49.695, 191.140, 1121.880, 136.970
## $ `2019_week26` <dbl> 1893.63, 49.48, 193.00, 1080.91, 133.96
## $ `2019_week27` <dbl> 1942.9100, 51.0575, 196.4000, 1131.5900, 137.0600
## $ `2019_week28` <dbl> 2011.000, 50.825, 204.870, 1144.900, 138.900
## $ `2019_week29` <dbl> 1964.5200, 50.6475, 198.3600, 1130.1000, 136.6200
## $ `2019_week30` <dbl> 1943.050, 51.935, 199.750, 1250.410, 141.340
## $ `2019_week31` <dbl> 1823.240, 51.005, 189.020, 1193.990, 136.900
## $ `2019_week32` <dbl> 1807.5800, 50.2475, 187.8500, 1188.0100, 137.7100
## $ `2019_week33` <dbl> 1792.570, 51.625, 183.700, 1177.600, 136.130
## $ `2019_week34` <dbl> 1749.62, 50.66, 177.75, 1151.29, 133.39
## $ `2019_week35` <dbl> 1776.290, 52.185, 185.670, 1188.100, 137.860
## $ `2019_week36` <dbl> 1833.510, 53.315, 187.490, 1204.930, 139.100
## $ `2019_week37` <dbl> 1839.3400, 54.6875, 187.1900, 1239.5601, 137.3200
## $ `2019_week38` <dbl> 1794.1600, 54.4325, 189.9300, 1229.9301, 139.4400
## $ `2019_week39` <dbl> 1725.450, 54.705, 177.100, 1225.090, 137.730
## $ `2019_week40` <dbl> 1739.6500, 56.7525, 180.4500, 1209.0000, 138.1200
## $ `2019_week41` <dbl> 1731.9200, 59.0525, 184.1900, 1215.4500, 139.6800
## $ `2019_week42` <dbl> 1757.5100, 59.1025, 185.8500, 1245.4900, 137.4100
## $ `2019_week43` <dbl> 1761.330, 61.645, 187.890, 1265.130, 140.730
## $ `2019_week44` <dbl> 1791.440, 63.955, 193.620, 1273.740, 143.720
## $ `2019_week45` <dbl> 1785.880, 65.035, 190.840, 1311.370, 145.960
## $ `2019_week46` <dbl> 1739.49, 66.44, 195.10, 1334.87, 149.97
## $ `2019_week47` <dbl> 1745.720, 65.445, 198.820, 1295.340, 149.590
## $ `2019_week48` <dbl> 1800.8000, 66.8125, 201.6400, 1304.9600, 151.3800
## $ `2019_week49` <dbl> 1751.6000, 67.6775, 201.0500, 1340.6200, 151.7500
## $ `2019_week50` <dbl> 1760.9399, 68.7875, 194.1100, 1347.8300, 154.5300
## $ `2019_week51` <dbl> 1786.50, 69.86, 206.30, 1349.59, 157.41
## $ `2019_week52` <dbl> 1869.80, 72.45, 208.10, 1351.89, 158.96
## $ `2020_week1` <dbl> 1874.9700, 74.3575, 208.6700, 1360.6600, 158.6200
## $ `2020_week2` <dbl> 1883.1600, 77.5825, 218.0600, 1429.7300, 161.3400
## $ `2020_week3` <dbl> 1864.7200, 79.6825, 222.1400, 1480.3900, 167.1000
## $ `2020_week4` <dbl> 1861.6400, 79.5775, 217.9400, 1466.7100, 165.0400
## $ `2020_week5` <dbl> 2008.7200, 77.3775, 201.9100, 1434.2300, 170.2300
## $ `2020_week6` <dbl> 2079.2800, 80.0075, 212.3300, 1479.2300, 183.8900
## $ `2020_week7` <dbl> 2134.8701, 81.2375, 214.1800, 1520.7400, 185.3500
## $ `2020_week8` <dbl> 2095.9700, 78.2625, 210.1800, 1485.1100, 178.5900
## $ `2020_week9` <dbl> 1883.75, 68.34, 192.47, 1339.33, 162.01
## $ `2020_week10` <dbl> 1901.0900, 72.2575, 181.0900, 1298.4100, 161.5700
## $ `2020_week11` <dbl> 1785.0000, 69.4925, 170.2800, 1219.7300, 158.8300
## $ `2020_week12` <dbl> 1846.09, 57.31, 149.73, 1072.32, 137.35
## $ `2020_week13` <dbl> 1900.100, 61.935, 156.790, 1110.710, 149.700
## $ `2020_week14` <dbl> 1906.5900, 60.3525, 154.1800, 1097.8800, 153.8300
## $ `2020_week15` <dbl> 2042.7600, 66.9975, 175.1900, 1211.4500, 165.1400
## $ `2020_week16` <dbl> 2375.00, 70.70, 179.24, 1283.25, 178.60
## $ `2020_week17` <dbl> 2410.2200, 70.7425, 190.0700, 1279.3101, 174.5500
## $ `2020_week18` <dbl> 2286.0400, 72.2675, 202.2700, 1320.6100, 174.5700
## $ `2020_week19` <dbl> 2379.6101, 77.5325, 212.3500, 1388.3700, 184.6800
## $ `2020_week20` <dbl> 2409.7800, 76.9275, 210.8800, 1373.1899, 183.1600
## $ `2020_week21` <dbl> 2436.8799, 79.7225, 234.9100, 1410.4200, 183.5100
## $ `2020_week22` <dbl> 2442.370, 79.485, 225.090, 1428.920, 183.250
## $ `2020_week23` <dbl> 2483.000, 82.875, 230.770, 1438.390, 187.200
## $ `2020_week24` <dbl> 2545.02, 84.70, 228.58, 1413.18, 187.74
## $ `2020_week25` <dbl> 2675.01, 87.43, 238.79, 1431.72, 195.15
## $ `2020_week26` <dbl> 2692.8701, 88.4075, 216.0800, 1359.9000, 196.3300
## $ `2020_week27` <dbl> 2890.3000, 91.0275, 233.4200, 1464.7000, 206.2600
## $ `2020_week28` <dbl> 3200.00, 95.92, 245.07, 1541.74, 213.67
## $ `2020_week29` <dbl> 2961.9700, 96.3275, 242.0300, 1515.5500, 202.8800
## $ `2020_week30` <dbl> 3008.910, 92.615, 230.710, 1511.870, 201.300
## $ `2020_week31` <dbl> 3164.68, 106.26, 253.67, 1482.96, 205.01
## $ `2020_week32` <dbl> 3167.4600, 111.1125, 268.4400, 1494.4900, 212.4800
## $ `2020_week33` <dbl> 3148.0200, 114.9075, 261.2400, 1507.7300, 208.9000
## $ `2020_week34` <dbl> 3284.72, 124.37, 267.01, 1580.42, 213.02
## $ `2020_week35` <dbl> 3401.8000, 124.8075, 293.6600, 1644.4100, 228.9100
## $ `2020_week36` <dbl> 3294.62, 120.96, 282.73, 1591.04, 214.25
## $ `2020_week37` <dbl> 3116.22, 112.00, 266.61, 1520.72, 204.03
## $ `2020_week38` <dbl> 2954.91, 106.84, 252.53, 1459.99, 200.39
## $ `2020_week39` <dbl> 3095.13, 112.28, 254.82, 1444.96, 207.82
## $ `2020_week40` <dbl> 3125.00, 113.02, 259.94, 1458.42, 206.19
## $ `2020_week41` <dbl> 3286.65, 116.97, 264.45, 1515.22, 215.81
## $ `2020_week42` <dbl> 3272.71, 119.02, 265.93, 1573.01, 219.66
## $ `2020_week43` <dbl> 3204.40, 115.04, 284.79, 1641.00, 216.23
## $ `2020_week44` <dbl> 3036.15, 108.86, 263.11, 1621.01, 202.47
## $ `2020_week45` <dbl> 3311.37, 118.69, 293.41, 1761.75, 223.72
## $ `2020_week46` <dbl> 3128.81, 119.26, 276.95, 1777.02, 216.51
## $ `2020_week47` <dbl> 3099.40, 117.34, 269.70, 1742.19, 210.39
## $ `2020_week48` <dbl> 3195.34, 116.59, 277.81, 1793.19, 215.23
## $ `2020_week49` <dbl> 3162.58, 122.25, 279.70, 1827.99, 214.36
## $ `2020_week50` <dbl> 3116.42, 122.41, 273.55, 1781.77, 213.26
## $ `2020_week51` <dbl> 3201.65, 126.66, 276.40, 1731.01, 218.59
## $ `2020_week52` <dbl> 3172.69, 131.97, 267.40, 1738.85, 222.75
## $ `2020_week53` <dbl> 3256.93, 132.69, 273.16, 1751.88, 222.42
The original dataset is in wide format, where each week is a separate column:
# Display the first few columns of the wide format data
stock_df
## # A tibble: 5 × 106
## company `2019_week1` `2019_week2` `2019_week3` `2019_week4` `2019_week5`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Amazon 1848. 1641. 1696. 1671. 1626.
## 2 Apple 73.4 38.1 39.2 39.4 41.6
## 3 Facebook 205. 144. 150. 149. 166.
## 4 Google 1337. 1057. 1098. 1091. 1111.
## 5 Microsoft 158. 103. 108. 107. 103.
## # ℹ 100 more variables: `2019_week6` <dbl>, `2019_week7` <dbl>,
## # `2019_week8` <dbl>, `2019_week9` <dbl>, `2019_week10` <dbl>,
## # `2019_week11` <dbl>, `2019_week12` <dbl>, `2019_week13` <dbl>,
## # `2019_week14` <dbl>, `2019_week15` <dbl>, `2019_week16` <dbl>,
## # `2019_week17` <dbl>, `2019_week18` <dbl>, `2019_week19` <dbl>,
## # `2019_week20` <dbl>, `2019_week21` <dbl>, `2019_week22` <dbl>,
## # `2019_week23` <dbl>, `2019_week24` <dbl>, `2019_week25` <dbl>, …
In this format: - Each row represents a company - Each column (except
company
) represents a specific week in 2019 - The values
are the closing stock prices
Now we’ll reshape the data from wide to long format using
pivot_longer()
:
# Reshape from wide to long format
stock_df_long <- stock_df %>%
pivot_longer(cols = !company,
names_to = c("year", "week"),
names_sep = "_week",
names_transform = list(year = as.integer, week = as.integer),
values_to = "price")
# Display the reshaped data
stock_df_long
## # A tibble: 525 × 4
## company year week price
## <chr> <int> <int> <dbl>
## 1 Amazon 2019 1 1848.
## 2 Amazon 2019 2 1641.
## 3 Amazon 2019 3 1696.
## 4 Amazon 2019 4 1671.
## 5 Amazon 2019 5 1626.
## 6 Amazon 2019 6 1588.
## 7 Amazon 2019 7 1608.
## 8 Amazon 2019 8 1632.
## 9 Amazon 2019 9 1672.
## 10 Amazon 2019 10 1621.
## # ℹ 515 more rows
The pivot_longer()
function:
cols = !company
: Selects all columns
except ‘company’ to pivotnames_to = c("year", "week")
: Splits
column names into two new columnsnames_sep = "_week"
: Uses “_week” as
the separator to split column namesnames_transform
: Converts ‘year’ and
‘week’ from character to integervalues_to = "price"
: Names the value
column as ‘price’# Summary statistics
summary(stock_df_long)
## company year week price
## Length:525 Min. :2019 Min. : 1.00 Min. : 38.07
## Class :character 1st Qu.:2019 1st Qu.:14.00 1st Qu.: 136.62
## Mode :character Median :2020 Median :27.00 Median : 212.48
## Mean :2020 Mean :26.75 Mean : 804.30
## 3rd Qu.:2020 3rd Qu.:40.00 3rd Qu.:1479.23
## Max. :2020 Max. :53.00 Max. :3401.80
# Count observations per company
stock_df_long %>%
count(company)
## # A tibble: 5 × 2
## company n
## <chr> <int>
## 1 Amazon 105
## 2 Apple 105
## 3 Facebook 105
## 4 Google 105
## 5 Microsoft 105
Let’s visualize the stock prices over time:
# Plot stock prices over weeks
ggplot(stock_df_long, aes(x = week, y = price, color = company)) +
geom_line(size = 1) +
labs(title = "Weekly Stock Prices in 2019",
subtitle = "Five Major US Tech Companies",
x = "Week Number",
y = "Stock Price (USD)",
color = "Company") +
theme_minimal() +
theme(legend.position = "bottom")