Import data

Shows my whole dataset

# excel file
data <- read_excel("myData_charts.xlsx")
str(data)
## tibble [45,090 × 10] (S3: tbl_df/tbl/data.frame)
##  $ stock_symbol: chr [1:45090] "AAPL" "AAPL" "AAPL" "AAPL" ...
##  $ date        : POSIXct[1:45090], format: "2010-01-04" "2010-01-05" ...
##  $ open        : num [1:45090] 7.62 7.66 7.66 7.56 7.51 ...
##  $ high        : num [1:45090] 7.66 7.7 7.69 7.57 7.57 ...
##  $ low         : num [1:45090] 7.58 7.62 7.53 7.47 7.47 ...
##  $ close       : num [1:45090] 7.64 7.66 7.53 7.52 7.57 ...
##  $ adj_close   : num [1:45090] 6.52 6.53 6.42 6.41 6.45 ...
##  $ volume      : num [1:45090] 4.94e+08 6.02e+08 5.52e+08 4.77e+08 4.48e+08 ...
##  $ Column1     : logi [1:45090] NA NA NA NA NA NA ...
##  $ HPR         : num [1:45090] 0.00272 -0.00103 -0.01591 -0.00553 0.00799 ...

Apply the following dplyr verbs to your data

Filter rows

Shows only the rows from the Apple (AAPL) stock information

filtered_data <- data %>%
    filter(stock_symbol == "AAPL")

filtered_data
## # A tibble: 3,271 × 10
##    stock_symbol date                 open  high   low close adj_close    volume
##    <chr>        <dttm>              <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.62  7.66  7.58  7.64      6.52 493729600
##  2 AAPL         2010-01-05 00:00:00  7.66  7.70  7.62  7.66      6.53 601904800
##  3 AAPL         2010-01-06 00:00:00  7.66  7.69  7.53  7.53      6.42 552160000
##  4 AAPL         2010-01-07 00:00:00  7.56  7.57  7.47  7.52      6.41 477131200
##  5 AAPL         2010-01-08 00:00:00  7.51  7.57  7.47  7.57      6.45 447610800
##  6 AAPL         2010-01-11 00:00:00  7.6   7.61  7.44  7.50      6.40 462229600
##  7 AAPL         2010-01-12 00:00:00  7.47  7.49  7.37  7.42      6.32 594459600
##  8 AAPL         2010-01-13 00:00:00  7.42  7.53  7.29  7.52      6.41 605892000
##  9 AAPL         2010-01-14 00:00:00  7.50  7.52  7.46  7.48      6.38 432894000
## 10 AAPL         2010-01-15 00:00:00  7.53  7.56  7.35  7.35      6.27 594067600
## # ℹ 3,261 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>

Arrange rows

Shows from highest to lowest the HPR (Holding Period Return) of Apply stock

arranged_data <- filtered_data %>%
    arrange(desc(HPR))

arranged_data
## # A tibble: 3,271 × 10
##    stock_symbol date                 open  high   low close adj_close     volume
##    <chr>        <dttm>              <dbl> <dbl> <dbl> <dbl>     <dbl>      <dbl>
##  1 AAPL         2015-08-24 00:00:00  23.7  27.2  23    25.8      23.5  648825200
##  2 AAPL         2022-02-24 00:00:00 153.  163.  152   163.      162.   141147500
##  3 AAPL         2020-02-28 00:00:00  64.3  69.6  64.1  68.3      67.1  426510000
##  4 AAPL         2018-12-26 00:00:00  37.1  39.3  36.7  39.3      37.9  234330000
##  5 AAPL         2022-10-13 00:00:00 135.  144.  134.  143.      143.   113224000
##  6 AAPL         2020-03-02 00:00:00  70.6  75.4  69.4  74.7      73.4  341397200
##  7 AAPL         2012-04-17 00:00:00  20.7  21.8  20.4  21.8      18.6 1025528000
##  8 AAPL         2020-09-21 00:00:00 105.  110.  103.  110.      109.   195713800
##  9 AAPL         2018-02-06 00:00:00  38.7  40.9  38.5  40.8      38.8  272975200
## 10 AAPL         2022-10-28 00:00:00 148.  158.  148.  156.      155.   164762400
## # ℹ 3,261 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>

Select columns

Only shows apple stock’s closing value and date of that closing value

selected_data <- filtered_data %>%
    select(stock_symbol, date, close)

selected_data
## # A tibble: 3,271 × 3
##    stock_symbol date                close
##    <chr>        <dttm>              <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.64
##  2 AAPL         2010-01-05 00:00:00  7.66
##  3 AAPL         2010-01-06 00:00:00  7.53
##  4 AAPL         2010-01-07 00:00:00  7.52
##  5 AAPL         2010-01-08 00:00:00  7.57
##  6 AAPL         2010-01-11 00:00:00  7.50
##  7 AAPL         2010-01-12 00:00:00  7.42
##  8 AAPL         2010-01-13 00:00:00  7.52
##  9 AAPL         2010-01-14 00:00:00  7.48
## 10 AAPL         2010-01-15 00:00:00  7.35
## # ℹ 3,261 more rows

Add columns

Adds a new column to my whole dataset, daily_return, which is a percentage of the daily return

new_data <- data %>%
    mutate(daily_return = (close - open) / open)

new_data
## # A tibble: 45,090 × 11
##    stock_symbol date                 open  high   low close adj_close    volume
##    <chr>        <dttm>              <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL         2010-01-04 00:00:00  7.62  7.66  7.58  7.64      6.52 493729600
##  2 AAPL         2010-01-05 00:00:00  7.66  7.70  7.62  7.66      6.53 601904800
##  3 AAPL         2010-01-06 00:00:00  7.66  7.69  7.53  7.53      6.42 552160000
##  4 AAPL         2010-01-07 00:00:00  7.56  7.57  7.47  7.52      6.41 477131200
##  5 AAPL         2010-01-08 00:00:00  7.51  7.57  7.47  7.57      6.45 447610800
##  6 AAPL         2010-01-11 00:00:00  7.6   7.61  7.44  7.50      6.40 462229600
##  7 AAPL         2010-01-12 00:00:00  7.47  7.49  7.37  7.42      6.32 594459600
##  8 AAPL         2010-01-13 00:00:00  7.42  7.53  7.29  7.52      6.41 605892000
##  9 AAPL         2010-01-14 00:00:00  7.50  7.52  7.46  7.48      6.38 432894000
## 10 AAPL         2010-01-15 00:00:00  7.53  7.56  7.35  7.35      6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 3 more variables: Column1 <lgl>, HPR <dbl>, daily_return <dbl>

Summarize by groups

Shows the average daily return of all 14 stocks based on the previously calculated daily return column

summary_data <- new_data %>%
    group_by(stock_symbol) %>%
    summarise(avg_daily_return = mean(daily_return, na.rm = TRUE))

summary_data
## # A tibble: 15 × 2
##    stock_symbol avg_daily_return
##    <chr>                   <dbl>
##  1 AAPL               0.000270  
##  2 ADBE               0.000549  
##  3 AMZN               0.000103  
##  4 CRM                0.000317  
##  5 CSCO               0.000308  
##  6 GOOGL             -0.0000358 
##  7 IBM                0.000364  
##  8 INTC               0.000415  
##  9 META               0.000163  
## 10 MSFT               0.000390  
## 11 NFLX               0.000899  
## 12 NVDA               0.000375  
## 13 ORCL               0.000545  
## 14 TSLA               0.00000684
## 15 <NA>             NaN

Data Ordered from Highest to Lowest

Order the stocks from highest to lowest based on their daily average return

ordered_data <- summary_data %>%
    arrange(desc(avg_daily_return))

ordered_data
## # A tibble: 15 × 2
##    stock_symbol avg_daily_return
##    <chr>                   <dbl>
##  1 NFLX               0.000899  
##  2 ADBE               0.000549  
##  3 ORCL               0.000545  
##  4 INTC               0.000415  
##  5 MSFT               0.000390  
##  6 NVDA               0.000375  
##  7 IBM                0.000364  
##  8 CRM                0.000317  
##  9 CSCO               0.000308  
## 10 AAPL               0.000270  
## 11 META               0.000163  
## 12 AMZN               0.000103  
## 13 TSLA               0.00000684
## 14 GOOGL             -0.0000358 
## 15 <NA>             NaN

My Own Playground

data$date <- as.Date(data$date)

apple_data <- data %>%
    filter(stock_symbol == "AAPL")

summary_stats <- apple_data %>%
    summarise(mean_open = mean(open), 
              mean_close = mean(close), 
              mean_volume = mean(volume), 
              min_date = min(date), 
              max_date = max(date)
    )

summary_stats
## # A tibble: 1 × 5
##   mean_open mean_close mean_volume min_date   max_date  
##       <dbl>      <dbl>       <dbl> <date>     <date>    
## 1      51.3       51.3  256325536. 2010-01-04 2022-12-29
ggplot(apple_data, aes(x = date, y = close)) + 
    geom_line(color = "blue") +
    labs(title = "Apple (AAPL) Stock Closing Prices", 
         x = "Date", 
         y = "Closing Price") + 
    theme_minimal()

ggplot(apple_data, aes(x = (close-open) / open)) +
    geom_histogram(binwidth = 0.02, fill = "skyblue", color = "black") +
    labs(title = "Distribution of Daily Returns for Apple (AAPL) Stock",
         x = "Daily Return", 
         y = "Frequency") +
    theme_minimal()

apple_data <- apple_data %>%
    mutate(daily_return = (close - open) / open)

top_return_day <- apple_data %>%
    filter(daily_return == max(daily_return)) %>%
    select(date, daily_return)

bottom_return_day <- apple_data %>%
    filter(daily_return == min(daily_return)) %>%
    select(date, daily_return)

top_return_day
## # A tibble: 1 × 2
##   date       daily_return
##   <date>            <dbl>
## 1 2015-08-24       0.0870
bottom_return_day
## # A tibble: 1 × 2
##   date       daily_return
##   <date>            <dbl>
## 1 2020-03-20      -0.0726

This shows that the max daily return date does not necessarily mean that the stock is in its prime-time. In other words, a stock can have a lucky day too, after which is might drop down again. Because Apple’s best day was 2015-08-24 which, we can see at the graph, is not a period where Apple did especially well, visible when comparing it to the period around begin 2020.

This same principle implies to the bottom return day.