# excel file
data <- read_excel("myData_charts.xlsx")
data
## # A tibble: 45,090 × 10
## stock_symbol date open high low close adj_close volume
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.62 7.66 7.58 7.64 6.52 493729600
## 2 AAPL 2010-01-05 00:00:00 7.66 7.70 7.62 7.66 6.53 601904800
## 3 AAPL 2010-01-06 00:00:00 7.66 7.69 7.53 7.53 6.42 552160000
## 4 AAPL 2010-01-07 00:00:00 7.56 7.57 7.47 7.52 6.41 477131200
## 5 AAPL 2010-01-08 00:00:00 7.51 7.57 7.47 7.57 6.45 447610800
## 6 AAPL 2010-01-11 00:00:00 7.6 7.61 7.44 7.50 6.40 462229600
## 7 AAPL 2010-01-12 00:00:00 7.47 7.49 7.37 7.42 6.32 594459600
## 8 AAPL 2010-01-13 00:00:00 7.42 7.53 7.29 7.52 6.41 605892000
## 9 AAPL 2010-01-14 00:00:00 7.50 7.52 7.46 7.48 6.38 432894000
## 10 AAPL 2010-01-15 00:00:00 7.53 7.56 7.35 7.35 6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
# Selecting stocks and their closing prices
selected_stocks <- c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "GOOGL", "IBM", "INTC", "META", "MSFT", "NFLX", "NVDA", "ORCL", "TSLA")
Make two bar charts here - one before ordering another after
Unordered factor levels
# Transform data: calculate average closing price by stock symbol
close_by_stocksymbol <- data %>%
filter(!is.na(close)) %>%
group_by(stock_symbol) %>%
summarise(avg_close = mean(close, na.rm = TRUE))
close_by_stocksymbol
## # A tibble: 14 × 2
## stock_symbol avg_close
## <chr> <dbl>
## 1 AAPL 51.3
## 2 ADBE 186.
## 3 AMZN 58.9
## 4 CRM 103.
## 5 CSCO 33.5
## 6 GOOGL 49.1
## 7 IBM 148.
## 8 INTC 36.5
## 9 META 148.
## 10 MSFT 100.
## 11 NFLX 188.
## 12 NVDA 50.6
## 13 ORCL 46.3
## 14 TSLA 58.8
close_by_stocksymbol %>%
ggplot(aes(x = avg_close, y = stock_symbol)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Mean Closing Price by Stock (Unordered)",
x = "Mean Closing Price",
y = "Stock Symbol") +
theme_minimal()
Ordered factor levels
close_by_stocksymbol %>%
ggplot(aes(x = avg_close, y = fct_reorder(stock_symbol, avg_close))) +
geom_bar(stat = "identity", fill = "skyblue") +
geom_text(aes(label = round(avg_close, 2)), hjust = -0.1, size = 3) +
labs(title = "Mean Closing Price by Stock (Ordered)",
x = "Mean Closing Price",
y = "Stock Symbol") +
theme_minimal()
Show examples of three functions:
data_recoded <- data %>%
mutate(stock_name = fct_recode(stock_symbol,
"Apple" = "AAPL",
"Adobe" = "ADBE",
"Amazon" = "AMZN")) %>%
select(stock_symbol, stock_name) %>%
filter(stock_symbol == "AAPL")
data_recoded
## # A tibble: 3,271 × 2
## stock_symbol stock_name
## <chr> <fct>
## 1 AAPL Apple
## 2 AAPL Apple
## 3 AAPL Apple
## 4 AAPL Apple
## 5 AAPL Apple
## 6 AAPL Apple
## 7 AAPL Apple
## 8 AAPL Apple
## 9 AAPL Apple
## 10 AAPL Apple
## # ℹ 3,261 more rows
data_collapsed <- data %>%
mutate(stock_symbol_col = fct_collapse(stock_symbol,
"First Half" = c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "MSFT", "ORCL"),
"Second Half" = c("GOOGL", "IBM", "INTC", "META", "NFLX", "NVDA", "TSLA"))) %>%
select(stock_symbol, stock_symbol_col) %>%
filter(stock_symbol_col != "First Half")
data_collapsed
## # A tibble: 22,191 × 2
## stock_symbol stock_symbol_col
## <chr> <fct>
## 1 GOOGL Second Half
## 2 GOOGL Second Half
## 3 GOOGL Second Half
## 4 GOOGL Second Half
## 5 GOOGL Second Half
## 6 GOOGL Second Half
## 7 GOOGL Second Half
## 8 GOOGL Second Half
## 9 GOOGL Second Half
## 10 GOOGL Second Half
## # ℹ 22,181 more rows
no_NA_data <- data %>% filter(!is.na(stock_symbol)) %>% count(stock_symbol)
data_lumped <- no_NA_data %>%
mutate(stock_lump = fct_lump(stock_symbol)) %>%
distinct(stock_lump)
data_lumped
## # A tibble: 14 × 1
## stock_lump
## <fct>
## 1 AAPL
## 2 ADBE
## 3 AMZN
## 4 CRM
## 5 CSCO
## 6 GOOGL
## 7 IBM
## 8 INTC
## 9 META
## 10 MSFT
## 11 NFLX
## 12 NVDA
## 13 ORCL
## 14 TSLA
No need to do anything here.