Import your data

# excel file
data <- read_excel("myData_charts.xlsx")
data
## # A tibble: 45,090 × 10
##    stock_symbol date                 open  high   low close adj_close    volume
##    <chr>        <dttm>              <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.62  7.66  7.58  7.64      6.52 493729600
##  2 AAPL         2010-01-05 00:00:00  7.66  7.70  7.62  7.66      6.53 601904800
##  3 AAPL         2010-01-06 00:00:00  7.66  7.69  7.53  7.53      6.42 552160000
##  4 AAPL         2010-01-07 00:00:00  7.56  7.57  7.47  7.52      6.41 477131200
##  5 AAPL         2010-01-08 00:00:00  7.51  7.57  7.47  7.57      6.45 447610800
##  6 AAPL         2010-01-11 00:00:00  7.6   7.61  7.44  7.50      6.40 462229600
##  7 AAPL         2010-01-12 00:00:00  7.47  7.49  7.37  7.42      6.32 594459600
##  8 AAPL         2010-01-13 00:00:00  7.42  7.53  7.29  7.52      6.41 605892000
##  9 AAPL         2010-01-14 00:00:00  7.50  7.52  7.46  7.48      6.38 432894000
## 10 AAPL         2010-01-15 00:00:00  7.53  7.56  7.35  7.35      6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
# Selecting stocks and their closing prices
selected_stocks <- c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "GOOGL", "IBM", "INTC", "META", "MSFT", "NFLX", "NVDA", "ORCL", "TSLA")

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

Unordered factor levels

# Transform data: calculate average closing price by stock symbol
close_by_stocksymbol <- data %>%
    
    filter(!is.na(close)) %>%
    group_by(stock_symbol) %>%
    summarise(avg_close = mean(close, na.rm = TRUE))

close_by_stocksymbol
## # A tibble: 14 × 2
##    stock_symbol avg_close
##    <chr>            <dbl>
##  1 AAPL              51.3
##  2 ADBE             186. 
##  3 AMZN              58.9
##  4 CRM              103. 
##  5 CSCO              33.5
##  6 GOOGL             49.1
##  7 IBM              148. 
##  8 INTC              36.5
##  9 META             148. 
## 10 MSFT             100. 
## 11 NFLX             188. 
## 12 NVDA              50.6
## 13 ORCL              46.3
## 14 TSLA              58.8
close_by_stocksymbol %>%
    
    ggplot(aes(x = avg_close, y = stock_symbol)) + 
    geom_bar(stat = "identity", fill = "skyblue") + 
    labs(title = "Mean Closing Price by Stock (Unordered)", 
         x     = "Mean Closing Price", 
         y     = "Stock Symbol") +
    theme_minimal()

Ordered factor levels

close_by_stocksymbol %>%
    
    ggplot(aes(x = avg_close, y = fct_reorder(stock_symbol, avg_close))) + 
    geom_bar(stat = "identity", fill = "skyblue") + 
    geom_text(aes(label = round(avg_close, 2)), hjust = -0.1, size = 3) +
    labs(title = "Mean Closing Price by Stock (Ordered)", 
         x     = "Mean Closing Price", 
         y     = "Stock Symbol") +
    theme_minimal()

Modify factor levels

Show examples of three functions:

  • fct_recode
data_recoded <- data %>%
    
    mutate(stock_name = fct_recode(stock_symbol, 
                                     "Apple" = "AAPL", 
                                     "Adobe" = "ADBE", 
                                     "Amazon" = "AMZN")) %>%
    select(stock_symbol, stock_name) %>%
    filter(stock_symbol == "AAPL")

data_recoded
## # A tibble: 3,271 × 2
##    stock_symbol stock_name
##    <chr>        <fct>     
##  1 AAPL         Apple     
##  2 AAPL         Apple     
##  3 AAPL         Apple     
##  4 AAPL         Apple     
##  5 AAPL         Apple     
##  6 AAPL         Apple     
##  7 AAPL         Apple     
##  8 AAPL         Apple     
##  9 AAPL         Apple     
## 10 AAPL         Apple     
## # ℹ 3,261 more rows
  • fct_collapse
data_collapsed <- data %>%
    
    mutate(stock_symbol_col = fct_collapse(stock_symbol, 
                                           "First Half" = c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "MSFT", "ORCL"),
                                           "Second Half" = c("GOOGL", "IBM", "INTC", "META", "NFLX", "NVDA", "TSLA"))) %>%
    select(stock_symbol, stock_symbol_col) %>%
    filter(stock_symbol_col != "First Half")

data_collapsed
## # A tibble: 22,191 × 2
##    stock_symbol stock_symbol_col
##    <chr>        <fct>           
##  1 GOOGL        Second Half     
##  2 GOOGL        Second Half     
##  3 GOOGL        Second Half     
##  4 GOOGL        Second Half     
##  5 GOOGL        Second Half     
##  6 GOOGL        Second Half     
##  7 GOOGL        Second Half     
##  8 GOOGL        Second Half     
##  9 GOOGL        Second Half     
## 10 GOOGL        Second Half     
## # ℹ 22,181 more rows
  • fct_lump
no_NA_data <- data %>% filter(!is.na(stock_symbol)) %>% count(stock_symbol)

data_lumped <- no_NA_data %>%
    
    mutate(stock_lump = fct_lump(stock_symbol)) %>%
    distinct(stock_lump)

data_lumped
## # A tibble: 14 × 1
##    stock_lump
##    <fct>     
##  1 AAPL      
##  2 ADBE      
##  3 AMZN      
##  4 CRM       
##  5 CSCO      
##  6 GOOGL     
##  7 IBM       
##  8 INTC      
##  9 META      
## 10 MSFT      
## 11 NFLX      
## 12 NVDA      
## 13 ORCL      
## 14 TSLA

Chapter 16

No need to do anything here.