Import your data
# excel file
data <- read_excel("myData_charts.xlsx")
data
## # A tibble: 45,090 × 10
## stock_symbol date open high low close adj_close volume
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.62 7.66 7.58 7.64 6.52 493729600
## 2 AAPL 2010-01-05 00:00:00 7.66 7.70 7.62 7.66 6.53 601904800
## 3 AAPL 2010-01-06 00:00:00 7.66 7.69 7.53 7.53 6.42 552160000
## 4 AAPL 2010-01-07 00:00:00 7.56 7.57 7.47 7.52 6.41 477131200
## 5 AAPL 2010-01-08 00:00:00 7.51 7.57 7.47 7.57 6.45 447610800
## 6 AAPL 2010-01-11 00:00:00 7.6 7.61 7.44 7.50 6.40 462229600
## 7 AAPL 2010-01-12 00:00:00 7.47 7.49 7.37 7.42 6.32 594459600
## 8 AAPL 2010-01-13 00:00:00 7.42 7.53 7.29 7.52 6.41 605892000
## 9 AAPL 2010-01-14 00:00:00 7.50 7.52 7.46 7.48 6.38 432894000
## 10 AAPL 2010-01-15 00:00:00 7.53 7.56 7.35 7.35 6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
# Selecting stocks and their closing prices
selected_stocks <- c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "GOOGL", "IBM", "INTC", "META", "MSFT", "NFLX", "NVDA", "ORCL", "TSLA")
# Creating a smaller dataset of just looking at AAPL, META, and TESLA stock close prices with corresponding dates
data_small <- data %>%
select(stock_symbol, date, close) %>%
filter(stock_symbol %in% c("AAPL", "META", "TSLA"))
data_small
## # A tibble: 9,107 × 3
## stock_symbol date close
## <chr> <dttm> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.64
## 2 AAPL 2010-01-05 00:00:00 7.66
## 3 AAPL 2010-01-06 00:00:00 7.53
## 4 AAPL 2010-01-07 00:00:00 7.52
## 5 AAPL 2010-01-08 00:00:00 7.57
## 6 AAPL 2010-01-11 00:00:00 7.50
## 7 AAPL 2010-01-12 00:00:00 7.42
## 8 AAPL 2010-01-13 00:00:00 7.52
## 9 AAPL 2010-01-14 00:00:00 7.48
## 10 AAPL 2010-01-15 00:00:00 7.35
## # ℹ 9,097 more rows
Separating and Uniting
Separate a column
data_year <- data_small %>%
separate(date, into = c("year", "month", "day"), sep = "-") %>%
select(-month, -day)
data_year
## # A tibble: 9,107 × 3
## stock_symbol year close
## <chr> <chr> <dbl>
## 1 AAPL 2010 7.64
## 2 AAPL 2010 7.66
## 3 AAPL 2010 7.53
## 4 AAPL 2010 7.52
## 5 AAPL 2010 7.57
## 6 AAPL 2010 7.50
## 7 AAPL 2010 7.42
## 8 AAPL 2010 7.52
## 9 AAPL 2010 7.48
## 10 AAPL 2010 7.35
## # ℹ 9,097 more rows
data_year$year <- as.numeric(data_year$year)
data_year_summary <- data_year %>%
group_by(stock_symbol, year) %>%
summarize(first_close = first(close), last_close = last(close), .groups = "drop")
data_year_summary
## # A tibble: 38 × 4
## stock_symbol year first_close last_close
## <chr> <dbl> <dbl> <dbl>
## 1 AAPL 2010 7.64 11.5
## 2 AAPL 2011 11.8 14.5
## 3 AAPL 2012 14.7 19.0
## 4 AAPL 2013 19.6 20.0
## 5 AAPL 2014 19.8 27.6
## 6 AAPL 2015 27.3 26.3
## 7 AAPL 2016 26.3 29.0
## 8 AAPL 2017 29.0 42.3
## 9 AAPL 2018 43.1 39.4
## 10 AAPL 2019 39.5 73.4
## # ℹ 28 more rows
Unite two columns
data_united <- data_year_summary %>%
mutate(unique_identifier = paste(stock_symbol, year, sep = "_"))
data_united <- data_united %>%
select(unique_identifier, first_close, last_close)
data_united
## # A tibble: 38 × 3
## unique_identifier first_close last_close
## <chr> <dbl> <dbl>
## 1 AAPL_2010 7.64 11.5
## 2 AAPL_2011 11.8 14.5
## 3 AAPL_2012 14.7 19.0
## 4 AAPL_2013 19.6 20.0
## 5 AAPL_2014 19.8 27.6
## 6 AAPL_2015 27.3 26.3
## 7 AAPL_2016 26.3 29.0
## 8 AAPL_2017 29.0 42.3
## 9 AAPL_2018 43.1 39.4
## 10 AAPL_2019 39.5 73.4
## # ℹ 28 more rows
Missing Values