Shows my whole dataset
# excel file
data <- read_excel("myData_charts.xlsx")
str(data)
## tibble [45,090 × 10] (S3: tbl_df/tbl/data.frame)
## $ stock_symbol: chr [1:45090] "AAPL" "AAPL" "AAPL" "AAPL" ...
## $ date : POSIXct[1:45090], format: "2010-01-04" "2010-01-05" ...
## $ open : num [1:45090] 7.62 7.66 7.66 7.56 7.51 ...
## $ high : num [1:45090] 7.66 7.7 7.69 7.57 7.57 ...
## $ low : num [1:45090] 7.58 7.62 7.53 7.47 7.47 ...
## $ close : num [1:45090] 7.64 7.66 7.53 7.52 7.57 ...
## $ adj_close : num [1:45090] 6.52 6.53 6.42 6.41 6.45 ...
## $ volume : num [1:45090] 4.94e+08 6.02e+08 5.52e+08 4.77e+08 4.48e+08 ...
## $ Column1 : logi [1:45090] NA NA NA NA NA NA ...
## $ HPR : num [1:45090] 0.00272 -0.00103 -0.01591 -0.00553 0.00799 ...
Shows only the rows from the Apple (AAPL) stock information
filtered_data <- data %>%
filter(stock_symbol == "AAPL")
filtered_data
## # A tibble: 3,271 × 10
## stock_symbol date open high low close adj_close volume
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.62 7.66 7.58 7.64 6.52 493729600
## 2 AAPL 2010-01-05 00:00:00 7.66 7.70 7.62 7.66 6.53 601904800
## 3 AAPL 2010-01-06 00:00:00 7.66 7.69 7.53 7.53 6.42 552160000
## 4 AAPL 2010-01-07 00:00:00 7.56 7.57 7.47 7.52 6.41 477131200
## 5 AAPL 2010-01-08 00:00:00 7.51 7.57 7.47 7.57 6.45 447610800
## 6 AAPL 2010-01-11 00:00:00 7.6 7.61 7.44 7.50 6.40 462229600
## 7 AAPL 2010-01-12 00:00:00 7.47 7.49 7.37 7.42 6.32 594459600
## 8 AAPL 2010-01-13 00:00:00 7.42 7.53 7.29 7.52 6.41 605892000
## 9 AAPL 2010-01-14 00:00:00 7.50 7.52 7.46 7.48 6.38 432894000
## 10 AAPL 2010-01-15 00:00:00 7.53 7.56 7.35 7.35 6.27 594067600
## # ℹ 3,261 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
Shows from highest to lowest the HPR (Holding Period Return) of Apply stock
arranged_data <- filtered_data %>%
arrange(desc(HPR))
arranged_data
## # A tibble: 3,271 × 10
## stock_symbol date open high low close adj_close volume
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2015-08-24 00:00:00 23.7 27.2 23 25.8 23.5 648825200
## 2 AAPL 2022-02-24 00:00:00 153. 163. 152 163. 162. 141147500
## 3 AAPL 2020-02-28 00:00:00 64.3 69.6 64.1 68.3 67.1 426510000
## 4 AAPL 2018-12-26 00:00:00 37.1 39.3 36.7 39.3 37.9 234330000
## 5 AAPL 2022-10-13 00:00:00 135. 144. 134. 143. 143. 113224000
## 6 AAPL 2020-03-02 00:00:00 70.6 75.4 69.4 74.7 73.4 341397200
## 7 AAPL 2012-04-17 00:00:00 20.7 21.8 20.4 21.8 18.6 1025528000
## 8 AAPL 2020-09-21 00:00:00 105. 110. 103. 110. 109. 195713800
## 9 AAPL 2018-02-06 00:00:00 38.7 40.9 38.5 40.8 38.8 272975200
## 10 AAPL 2022-10-28 00:00:00 148. 158. 148. 156. 155. 164762400
## # ℹ 3,261 more rows
## # ℹ 2 more variables: Column1 <lgl>, HPR <dbl>
Only shows apple stock’s closing value and date of that closing value
selected_data <- filtered_data %>%
select(stock_symbol, date, close)
selected_data
## # A tibble: 3,271 × 3
## stock_symbol date close
## <chr> <dttm> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.64
## 2 AAPL 2010-01-05 00:00:00 7.66
## 3 AAPL 2010-01-06 00:00:00 7.53
## 4 AAPL 2010-01-07 00:00:00 7.52
## 5 AAPL 2010-01-08 00:00:00 7.57
## 6 AAPL 2010-01-11 00:00:00 7.50
## 7 AAPL 2010-01-12 00:00:00 7.42
## 8 AAPL 2010-01-13 00:00:00 7.52
## 9 AAPL 2010-01-14 00:00:00 7.48
## 10 AAPL 2010-01-15 00:00:00 7.35
## # ℹ 3,261 more rows
Adds a new column to my whole dataset, daily_return, which is a percentage of the daily return
new_data <- data %>%
mutate(daily_return = (close - open) / open)
new_data
## # A tibble: 45,090 × 11
## stock_symbol date open high low close adj_close volume
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2010-01-04 00:00:00 7.62 7.66 7.58 7.64 6.52 493729600
## 2 AAPL 2010-01-05 00:00:00 7.66 7.70 7.62 7.66 6.53 601904800
## 3 AAPL 2010-01-06 00:00:00 7.66 7.69 7.53 7.53 6.42 552160000
## 4 AAPL 2010-01-07 00:00:00 7.56 7.57 7.47 7.52 6.41 477131200
## 5 AAPL 2010-01-08 00:00:00 7.51 7.57 7.47 7.57 6.45 447610800
## 6 AAPL 2010-01-11 00:00:00 7.6 7.61 7.44 7.50 6.40 462229600
## 7 AAPL 2010-01-12 00:00:00 7.47 7.49 7.37 7.42 6.32 594459600
## 8 AAPL 2010-01-13 00:00:00 7.42 7.53 7.29 7.52 6.41 605892000
## 9 AAPL 2010-01-14 00:00:00 7.50 7.52 7.46 7.48 6.38 432894000
## 10 AAPL 2010-01-15 00:00:00 7.53 7.56 7.35 7.35 6.27 594067600
## # ℹ 45,080 more rows
## # ℹ 3 more variables: Column1 <lgl>, HPR <dbl>, daily_return <dbl>
Shows the average daily return of all 14 stocks based on the previously calculated daily return column
summary_data <- new_data %>%
group_by(stock_symbol) %>%
summarise(avg_daily_return = mean(daily_return, na.rm = TRUE))
summary_data
## # A tibble: 15 × 2
## stock_symbol avg_daily_return
## <chr> <dbl>
## 1 AAPL 0.000270
## 2 ADBE 0.000549
## 3 AMZN 0.000103
## 4 CRM 0.000317
## 5 CSCO 0.000308
## 6 GOOGL -0.0000358
## 7 IBM 0.000364
## 8 INTC 0.000415
## 9 META 0.000163
## 10 MSFT 0.000390
## 11 NFLX 0.000899
## 12 NVDA 0.000375
## 13 ORCL 0.000545
## 14 TSLA 0.00000684
## 15 <NA> NaN
Order the stocks from highest to lowest based on their daily average return
ordered_data <- summary_data %>%
arrange(desc(avg_daily_return))
ordered_data
## # A tibble: 15 × 2
## stock_symbol avg_daily_return
## <chr> <dbl>
## 1 NFLX 0.000899
## 2 ADBE 0.000549
## 3 ORCL 0.000545
## 4 INTC 0.000415
## 5 MSFT 0.000390
## 6 NVDA 0.000375
## 7 IBM 0.000364
## 8 CRM 0.000317
## 9 CSCO 0.000308
## 10 AAPL 0.000270
## 11 META 0.000163
## 12 AMZN 0.000103
## 13 TSLA 0.00000684
## 14 GOOGL -0.0000358
## 15 <NA> NaN
data$date <- as.Date(data$date)
apple_data <- data %>%
filter(stock_symbol == "AAPL")
summary_stats <- apple_data %>%
summarise(mean_open = mean(open),
mean_close = mean(close),
mean_volume = mean(volume),
min_date = min(date),
max_date = max(date)
)
summary_stats
## # A tibble: 1 × 5
## mean_open mean_close mean_volume min_date max_date
## <dbl> <dbl> <dbl> <date> <date>
## 1 51.3 51.3 256325536. 2010-01-04 2022-12-29
ggplot(apple_data, aes(x = date, y = close)) +
geom_line(color = "blue") +
labs(title = "Apple (AAPL) Stock Closing Prices",
x = "Date",
y = "Closing Price") +
theme_minimal()
ggplot(apple_data, aes(x = (close-open) / open)) +
geom_histogram(binwidth = 0.02, fill = "skyblue", color = "black") +
labs(title = "Distribution of Daily Returns for Apple (AAPL) Stock",
x = "Daily Return",
y = "Frequency") +
theme_minimal()
apple_data <- apple_data %>%
mutate(daily_return = (close - open) / open)
top_return_day <- apple_data %>%
filter(daily_return == max(daily_return)) %>%
select(date, daily_return)
bottom_return_day <- apple_data %>%
filter(daily_return == min(daily_return)) %>%
select(date, daily_return)
top_return_day
## # A tibble: 1 × 2
## date daily_return
## <date> <dbl>
## 1 2015-08-24 0.0870
bottom_return_day
## # A tibble: 1 × 2
## date daily_return
## <date> <dbl>
## 1 2020-03-20 -0.0726
This shows that the max daily return date does not necessarily mean that the stock is in its prime-time. In other words, a stock can have a lucky day too, after which is might drop down again. Because Apple’s best day was 2015-08-24 which, we can see at the graph, is not a period where Apple did especially well, visible when comparing it to the period around begin 2020.
This same principle implies to the bottom return day.