Import your data

# excel file
data <- read_excel("myData_charts.xlsx")
data
## # A tibble: 45,090 × 10
##    stock_symbol date                 open  high   low close adj_close    volume
##    <chr>        <dttm>              <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.62  7.66  7.58  7.64      6.52 493729600
##  2 AAPL         2010-01-05 00:00:00  7.66  7.70  7.62  7.66      6.53 601904800
##  3 AAPL         2010-01-06 00:00:00  7.66  7.69  7.53  7.53      6.42 552160000
##  4 AAPL         2010-01-07 00:00:00  7.56  7.57  7.47  7.52      6.41 477131200
##  5 AAPL         2010-01-08 00:00:00  7.51  7.57  7.47  7.57      6.45 447610800
##  6 AAPL         2010-01-11 00:00:00  7.6   7.61  7.44  7.50      6.40 462229600
##  7 AAPL         2010-01-12 00:00:00  7.47  7.49  7.37  7.42      6.32 594459600
##  8 AAPL         2010-01-13 00:00:00  7.42  7.53  7.29  7.52      6.41 605892000
##  9 AAPL         2010-01-14 00:00:00  7.50  7.52  7.46  7.48      6.38 432894000
## 10 AAPL         2010-01-15 00:00:00  7.53  7.56  7.35  7.35      6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
# Selecting stocks and their closing prices
selected_stocks <- c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "GOOGL", "IBM", "INTC", "META", "MSFT", "NFLX", "NVDA", "ORCL", "TSLA")

# Creating a smaller dataset of just looking at AAPL, META, and TESLA stock close prices with corresponding dates
data_small <- data %>%
    
    select(stock_symbol, date, close) %>%
    filter(stock_symbol %in% c("AAPL", "META", "TSLA"))

data_small
## # A tibble: 9,107 × 3
##    stock_symbol date                close
##    <chr>        <dttm>              <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.64
##  2 AAPL         2010-01-05 00:00:00  7.66
##  3 AAPL         2010-01-06 00:00:00  7.53
##  4 AAPL         2010-01-07 00:00:00  7.52
##  5 AAPL         2010-01-08 00:00:00  7.57
##  6 AAPL         2010-01-11 00:00:00  7.50
##  7 AAPL         2010-01-12 00:00:00  7.42
##  8 AAPL         2010-01-13 00:00:00  7.52
##  9 AAPL         2010-01-14 00:00:00  7.48
## 10 AAPL         2010-01-15 00:00:00  7.35
## # ℹ 9,097 more rows

Pivoting

long to wide form

data_wide <- data_small %>%
    
    pivot_wider(names_from  = stock_symbol, 
                values_from = close)

data_wide
## # A tibble: 3,287 × 4
##    date                 AAPL  META  TSLA
##    <dttm>              <dbl> <dbl> <dbl>
##  1 2010-01-04 00:00:00  7.64    NA    NA
##  2 2010-01-05 00:00:00  7.66    NA    NA
##  3 2010-01-06 00:00:00  7.53    NA    NA
##  4 2010-01-07 00:00:00  7.52    NA    NA
##  5 2010-01-08 00:00:00  7.57    NA    NA
##  6 2010-01-11 00:00:00  7.50    NA    NA
##  7 2010-01-12 00:00:00  7.42    NA    NA
##  8 2010-01-13 00:00:00  7.52    NA    NA
##  9 2010-01-14 00:00:00  7.48    NA    NA
## 10 2010-01-15 00:00:00  7.35    NA    NA
## # ℹ 3,277 more rows

wide to long form

data_long <- data_wide %>%
    
    pivot_longer(cols = -date,
                 names_to = "stock_symbol", 
                 values_to = "close") 

data_long
## # A tibble: 9,861 × 3
##    date                stock_symbol close
##    <dttm>              <chr>        <dbl>
##  1 2010-01-04 00:00:00 AAPL          7.64
##  2 2010-01-04 00:00:00 META         NA   
##  3 2010-01-04 00:00:00 TSLA         NA   
##  4 2010-01-05 00:00:00 AAPL          7.66
##  5 2010-01-05 00:00:00 META         NA   
##  6 2010-01-05 00:00:00 TSLA         NA   
##  7 2010-01-06 00:00:00 AAPL          7.53
##  8 2010-01-06 00:00:00 META         NA   
##  9 2010-01-06 00:00:00 TSLA         NA   
## 10 2010-01-07 00:00:00 AAPL          7.52
## # ℹ 9,851 more rows
unique_values <- data_wide %>%
    summarize_all(~ length(unique(.)))

unique_values
## # A tibble: 1 × 4
##    date  AAPL  META  TSLA
##   <int> <int> <int> <int>
## 1  3287  3133  2542  2956

Separating and Uniting

Separate a column

data_year <- data_small %>%
    separate(date, into = c("year", "month", "day"), sep = "-") %>%
    select(-month, -day)

data_year
## # A tibble: 9,107 × 3
##    stock_symbol year  close
##    <chr>        <chr> <dbl>
##  1 AAPL         2010   7.64
##  2 AAPL         2010   7.66
##  3 AAPL         2010   7.53
##  4 AAPL         2010   7.52
##  5 AAPL         2010   7.57
##  6 AAPL         2010   7.50
##  7 AAPL         2010   7.42
##  8 AAPL         2010   7.52
##  9 AAPL         2010   7.48
## 10 AAPL         2010   7.35
## # ℹ 9,097 more rows
data_year$year <- as.numeric(data_year$year)

data_year_summary <- data_year %>%
    group_by(stock_symbol, year) %>%
    summarize(first_close = first(close), last_close = last(close), .groups = "drop")

data_year_summary
## # A tibble: 38 × 4
##    stock_symbol  year first_close last_close
##    <chr>        <dbl>       <dbl>      <dbl>
##  1 AAPL          2010        7.64       11.5
##  2 AAPL          2011       11.8        14.5
##  3 AAPL          2012       14.7        19.0
##  4 AAPL          2013       19.6        20.0
##  5 AAPL          2014       19.8        27.6
##  6 AAPL          2015       27.3        26.3
##  7 AAPL          2016       26.3        29.0
##  8 AAPL          2017       29.0        42.3
##  9 AAPL          2018       43.1        39.4
## 10 AAPL          2019       39.5        73.4
## # ℹ 28 more rows

Unite two columns

data_united <- data_year_summary %>%
  mutate(unique_identifier = paste(stock_symbol, year, sep = "_"))

data_united <- data_united %>%
  select(unique_identifier, first_close, last_close)

data_united
## # A tibble: 38 × 3
##    unique_identifier first_close last_close
##    <chr>                   <dbl>      <dbl>
##  1 AAPL_2010                7.64       11.5
##  2 AAPL_2011               11.8        14.5
##  3 AAPL_2012               14.7        19.0
##  4 AAPL_2013               19.6        20.0
##  5 AAPL_2014               19.8        27.6
##  6 AAPL_2015               27.3        26.3
##  7 AAPL_2016               26.3        29.0
##  8 AAPL_2017               29.0        42.3
##  9 AAPL_2018               43.1        39.4
## 10 AAPL_2019               39.5        73.4
## # ℹ 28 more rows

Missing Values