In this exercise you will learn to plot data using the ggplot2 package. To answer the questions below, use Chapter 4.3 Categorical vs. Quantitative Data Visualization with R.
# Load packages
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
## ══ Need to Learn tidyquant? ═══════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(tidyverse)
## ── Attaching packages ────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ───────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks xts::first()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks xts::last()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
# Import stock prices
stock_prices <- tq_get(c("AAPL", "MSFT"), get = "stock.prices", from = "2020-01-01")
# Calculate daily returns
stock_returns <-
stock_prices %>%
group_by(symbol) %>%
tq_mutate(select = adjusted, mutate_fun = periodReturn, period = "daily")
stock_returns
## # A tibble: 74 x 9
## # Groups: symbol [2]
## symbol date open high low close volume adjusted daily.returns
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2020-01-02 296. 301. 295. 300. 33870100 300. 0
## 2 AAPL 2020-01-03 297. 301. 296. 297. 36580700 297. -0.00972
## 3 AAPL 2020-01-06 294. 300. 293. 300. 29596800 299. 0.00797
## 4 AAPL 2020-01-07 300. 301. 297. 298. 27218000 298. -0.00470
## 5 AAPL 2020-01-08 297. 304. 297. 303. 33019800 302. 0.0161
## 6 AAPL 2020-01-09 307. 310. 306. 310. 42527100 309. 0.0212
## 7 AAPL 2020-01-10 311. 313. 308. 310. 35161200 310. 0.00226
## 8 AAPL 2020-01-13 312. 317. 311. 317. 30383000 316. 0.0214
## 9 AAPL 2020-01-14 317. 318. 312. 313. 40488600 312. -0.0135
## 10 AAPL 2020-01-15 312. 316. 310. 311. 30480900 311. -0.00429
## # … with 64 more rows
ggplot(stock_returns,
aes(x = symbol)) +
geom_density(alpha = 0.4) +
labs(title = "Daily returns distribution by stock")
ggplot(stock_returns,
aes(x = symbol,
y = daily.returns)) +
geom_boxplot() +
labs(title = "Daily returns distribution by stock")
## Q3 Based on the boxplot above, which of the two stocks would you invest in? I would invest in Microsoft because the median is higher then Apples.
library(dplyr)
plotdata <- stock_returns %>%
group_by(symbol) %>%
summarize(mean_return = mean(daily.returns))
ggplot(plotdata,
aes(x = symbol,
y = mean_return)) +
geom_bar(stat = "identity")
ggplot(plotdata,
aes(x = symbol,
y = mean_return)) +
geom_bar(stat = "identity",
fill = "cornflowerblue")
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(plotdata,
aes(x = symbol,
y = mean_return)) +
geom_bar(stat = "identity",
fill = "cornflowerblue")+
geom_text(label = "mean returns")
Hint: Use message, echo and results in the chunk options. Refer to the RMarkdown Reference Guide.