data <- read_excel("../00_data/myData.xlsx")
data
# Transform data: Average adjusted close by stock symbol
adjclose_ss <- data %>%
group_by(stock_symbol) %>%
summarise(
avg_adj_close = mean(adj_close, na.rm = TRUE)
)
adjclose_ss
## # A tibble: 14 × 2
## stock_symbol avg_adj_close
## <chr> <dbl>
## 1 AAPL 49.4
## 2 ADBE 186.
## 3 AMZN 58.9
## 4 CRM 103.
## 5 CSCO 28.6
## 6 GOOGL 49.1
## 7 IBM 113.
## 8 INTC 31.3
## 9 META 148.
## 10 MSFT 95.3
## 11 NFLX 188.
## 12 NVDA 50.3
## 13 ORCL 42.6
## 14 TSLA 58.8
# Plot
adjclose_ss %>%
ggplot(aes(x= avg_adj_close, y= stock_symbol)) +
geom_point()
adjclose_ss %>%
ggplot(aes(x = avg_adj_close, y= fct_reorder(.f = stock_symbol, .x = avg_adj_close))) +
geom_point() +
# Labeling
labs(y = NULL, x= "Average Adjusted Close by Stock Symbol")
data %>% distinct(stock_symbol)
## # A tibble: 14 × 1
## stock_symbol
## <chr>
## 1 AAPL
## 2 ADBE
## 3 AMZN
## 4 CRM
## 5 CSCO
## 6 GOOGL
## 7 IBM
## 8 INTC
## 9 META
## 10 MSFT
## 11 NFLX
## 12 NVDA
## 13 ORCL
## 14 TSLA
# Recode
data %>%
mutate(symbol_rev = fct_recode(stock_symbol,"NVDA" = "Nvidia")) %>%
select(stock_symbol, symbol_rev) %>%
filter(stock_symbol == "Nvidia")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `symbol_rev = fct_recode(stock_symbol, NVDA = "Nvidia")`.
## Caused by warning:
## ! Unknown levels in `f`: Nvidia
## # A tibble: 0 × 2
## # ℹ 2 variables: stock_symbol <chr>, symbol_rev <fct>
# Collapse multiple levels into one
data %>%
mutate(stock_symbol_col = fct_collapse(stock_symbol,"Microsoft" = c("MSFT"))) %>%
select(stock_symbol, stock_symbol_col) %>%
filter(stock_symbol != "MSFT")
## # A tibble: 41,817 × 2
## stock_symbol stock_symbol_col
## <chr> <fct>
## 1 AAPL AAPL
## 2 AAPL AAPL
## 3 AAPL AAPL
## 4 AAPL AAPL
## 5 AAPL AAPL
## 6 AAPL AAPL
## 7 AAPL AAPL
## 8 AAPL AAPL
## 9 AAPL AAPL
## 10 AAPL AAPL
## # ℹ 41,807 more rows
# Lump small levels into other levels
data %>% count(stock_symbol)
## # A tibble: 14 × 2
## stock_symbol n
## <chr> <int>
## 1 AAPL 3271
## 2 ADBE 3271
## 3 AMZN 3271
## 4 CRM 3271
## 5 CSCO 3271
## 6 GOOGL 3271
## 7 IBM 3271
## 8 INTC 3271
## 9 META 2688
## 10 MSFT 3271
## 11 NFLX 3271
## 12 NVDA 3271
## 13 ORCL 3271
## 14 TSLA 3148
data %>% mutate(stock_symbol_lump = fct_lump(stock_symbol)) %>% distinct(stock_symbol_lump)
## # A tibble: 14 × 1
## stock_symbol_lump
## <fct>
## 1 AAPL
## 2 ADBE
## 3 AMZN
## 4 CRM
## 5 CSCO
## 6 GOOGL
## 7 IBM
## 8 INTC
## 9 Other
## 10 MSFT
## 11 NFLX
## 12 NVDA
## 13 ORCL
## 14 TSLA
No need to do anything here.