Import your data

data <- read_excel("../00_data/myData.xlsx")

Chapter 15

Create a factor

data

Modify factor order

Unordered factor levels

# Transform data: Average adjusted close by stock symbol

adjclose_ss <- data %>% 
    
    group_by(stock_symbol) %>%
    summarise(
        avg_adj_close = mean(adj_close, na.rm = TRUE)
    )
adjclose_ss
## # A tibble: 14 × 2
##    stock_symbol avg_adj_close
##    <chr>                <dbl>
##  1 AAPL                  49.4
##  2 ADBE                 186. 
##  3 AMZN                  58.9
##  4 CRM                  103. 
##  5 CSCO                  28.6
##  6 GOOGL                 49.1
##  7 IBM                  113. 
##  8 INTC                  31.3
##  9 META                 148. 
## 10 MSFT                  95.3
## 11 NFLX                 188. 
## 12 NVDA                  50.3
## 13 ORCL                  42.6
## 14 TSLA                  58.8
# Plot

adjclose_ss %>%
    ggplot(aes(x= avg_adj_close, y= stock_symbol)) +
    geom_point()

Ordered factor levels

adjclose_ss %>%
    
    ggplot(aes(x = avg_adj_close, y= fct_reorder(.f =  stock_symbol, .x = avg_adj_close))) +
    geom_point() +
    # Labeling
    labs(y = NULL, x= "Average Adjusted Close by Stock Symbol")

Modify factor levels

data %>% distinct(stock_symbol)
## # A tibble: 14 × 1
##    stock_symbol
##    <chr>       
##  1 AAPL        
##  2 ADBE        
##  3 AMZN        
##  4 CRM         
##  5 CSCO        
##  6 GOOGL       
##  7 IBM         
##  8 INTC        
##  9 META        
## 10 MSFT        
## 11 NFLX        
## 12 NVDA        
## 13 ORCL        
## 14 TSLA
# Recode
data %>%
    
    mutate(symbol_rev = fct_recode(stock_symbol,"NVDA" = "Nvidia")) %>%
    select(stock_symbol, symbol_rev) %>%
    filter(stock_symbol == "Nvidia")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `symbol_rev = fct_recode(stock_symbol, NVDA = "Nvidia")`.
## Caused by warning:
## ! Unknown levels in `f`: Nvidia
## # A tibble: 0 × 2
## # ℹ 2 variables: stock_symbol <chr>, symbol_rev <fct>
# Collapse multiple levels into one
data %>% 
    mutate(stock_symbol_col = fct_collapse(stock_symbol,"Microsoft" = c("MSFT"))) %>%
    select(stock_symbol, stock_symbol_col) %>%
    filter(stock_symbol != "MSFT")
## # A tibble: 41,817 × 2
##    stock_symbol stock_symbol_col
##    <chr>        <fct>           
##  1 AAPL         AAPL            
##  2 AAPL         AAPL            
##  3 AAPL         AAPL            
##  4 AAPL         AAPL            
##  5 AAPL         AAPL            
##  6 AAPL         AAPL            
##  7 AAPL         AAPL            
##  8 AAPL         AAPL            
##  9 AAPL         AAPL            
## 10 AAPL         AAPL            
## # ℹ 41,807 more rows
# Lump small levels into other levels
data %>% count(stock_symbol)
## # A tibble: 14 × 2
##    stock_symbol     n
##    <chr>        <int>
##  1 AAPL          3271
##  2 ADBE          3271
##  3 AMZN          3271
##  4 CRM           3271
##  5 CSCO          3271
##  6 GOOGL         3271
##  7 IBM           3271
##  8 INTC          3271
##  9 META          2688
## 10 MSFT          3271
## 11 NFLX          3271
## 12 NVDA          3271
## 13 ORCL          3271
## 14 TSLA          3148
data %>% mutate(stock_symbol_lump = fct_lump(stock_symbol)) %>% distinct(stock_symbol_lump)
## # A tibble: 14 × 1
##    stock_symbol_lump
##    <fct>            
##  1 AAPL             
##  2 ADBE             
##  3 AMZN             
##  4 CRM              
##  5 CSCO             
##  6 GOOGL            
##  7 IBM              
##  8 INTC             
##  9 Other            
## 10 MSFT             
## 11 NFLX             
## 12 NVDA             
## 13 ORCL             
## 14 TSLA

Chapter 16

No need to do anything here.