| itle: “Chapter 4 Bivariate Graphs” |
| uthor: “Daniel Lee” |
| ate: “9/10/2019” |
| utput: |
| html_document: |
| toc: TRUE |
| ditor_options: |
| chunk_output_type: console |
In this exercise you will learn to plot data using the ggplot2 package. To answer the questions below, use Chapter 4.3 Categorical vs. Quantitative Data Visualization with R.
# Load packages
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks xts::first()
## ✖ lubridate::intersect() masks base::intersect()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks xts::last()
## ✖ lubridate::setdiff() masks base::setdiff()
## ✖ lubridate::union() masks base::union()
library(tidyverse)
# Pick stocks
stocks <- c("AAPL", "MSFT")
# Import stock prices
stock_prices <- stocks %>%
tq_get(get = "stock.prices",
from = "2019-01-01",
to = "2019-05-31") %>%
group_by(symbol)
stock_prices
## # A tibble: 206 x 8
## # Groups: symbol [2]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2019-01-02 155. 159. 154. 158. 37039700 156.
## 2 AAPL 2019-01-03 144. 146. 142 142. 91312200 141.
## 3 AAPL 2019-01-04 145. 149. 144. 148. 58607100 147.
## 4 AAPL 2019-01-07 149. 149. 146. 148. 54777800 146.
## 5 AAPL 2019-01-08 150. 152. 149. 151. 41025300 149.
## 6 AAPL 2019-01-09 151. 155. 150. 153. 45099100 151.
## 7 AAPL 2019-01-10 152. 154. 151. 154. 35780700 152.
## 8 AAPL 2019-01-11 153. 154. 152. 152. 27023200 150.
## 9 AAPL 2019-01-14 151. 151. 149. 150 32439200 148.
## 10 AAPL 2019-01-15 150. 153. 150. 153. 28710900 151.
## # … with 196 more rows
# Calculate daily returns
stock_returns <-
stock_prices %>%
tq_mutate(select = adjusted, mutate_fun = periodReturn, period = "daily")
stock_returns
## # A tibble: 206 x 9
## # Groups: symbol [2]
## symbol date open high low close volume adjusted daily.returns
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2019-01-02 155. 159. 154. 158. 3.70e7 156. 0
## 2 AAPL 2019-01-03 144. 146. 142 142. 9.13e7 141. -0.0996
## 3 AAPL 2019-01-04 145. 149. 144. 148. 5.86e7 147. 0.0427
## 4 AAPL 2019-01-07 149. 149. 146. 148. 5.48e7 146. -0.00223
## 5 AAPL 2019-01-08 150. 152. 149. 151. 4.10e7 149. 0.0191
## 6 AAPL 2019-01-09 151. 155. 150. 153. 4.51e7 151. 0.0170
## 7 AAPL 2019-01-10 152. 154. 151. 154. 3.58e7 152. 0.00320
## 8 AAPL 2019-01-11 153. 154. 152. 152. 2.70e7 150. -0.00982
## 9 AAPL 2019-01-14 151. 151. 149. 150 3.24e7 148. -0.0150
## 10 AAPL 2019-01-15 150. 153. 150. 153. 2.87e7 151. 0.0205
## # … with 196 more rows
library(dplyr)
plotdata <- stock_returns %>%
group_by(symbol) %>%
summarize(mean_salary = mean(daily.returns))
plotdata
## # A tibble: 2 x 2
## symbol mean_salary
## <chr> <dbl>
## 1 AAPL 0.00144
## 2 MSFT 0.00228
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
ggplot(plotdata,
aes(x = symbol,
y = mean_salary)) +
geom_bar(stat = "identity")
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
ggplot(plotdata,
aes(x = factor(symbol,
labels = c("Apple",
"Microsoft")),
y = mean_salary)) +
geom_bar(stat = "identity")
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
ggplot(plotdata,
aes(x = factor(symbol,
labels = c("Apple",
"Microsoft")),
y = mean_salary)) +
geom_bar(stat = "identity",
fill = "cornflowerblue")
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(plotdata,
aes(x = factor(symbol,
labels = c("Apple",
"Microsoft")),
y = mean_salary)) +
geom_bar(stat = "identity",
fill = "cornflowerblue") +
geom_text(aes(label = percent(mean_salary)),
vjust = -0.25) +
scale_y_continuous(breaks = seq(0, 130000, 20000),
label = dollar) +
labs(title = "Mean Salary by Rank",
subtitle = "9-month academic salary for 2008-2009",
x = "",
y = "")
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
library(scales)
ggplot(plotdata,
aes(x = factor(symbol,
labels = c("Apple",
"Microsoft")),
y = mean_salary)) +
geom_bar(stat = "identity",
fill = "cornflowerblue") +
geom_text(aes(label = percent(mean_salary)),
vjust = -0.25) +
scale_y_continuous(breaks = seq(0, 130000, 20000),
label = dollar) +
labs(title = "Mean Daily returns of apple and Microsoft",
x = "stocks",
y = "Mean Daily Returns")
Hint: See the code in 4.3.1 Bar chart (on summary statistics).
library(scales)
ggplot(plotdata,
aes(x = factor(symbol,
labels = c("Apple",
"Microsoft")),
y = mean_salary)) +
geom_bar(stat = "identity",
fill = "cornflowerblue") +
geom_text(aes(label = percent(mean_salary)),
vjust = -0.25) +
scale_y_continuous(breaks = seq(0, 130000, 20000),
label = dollar) +
labs(title = "Mean Daily returns of apple and Microsoft",
x = "stocks",
y = "Mean Daily Returns")
ggplot(stock_returns,
aes(x = daily.returns,
fill = symbol)) +
geom_density(alpha = 0.4) +
labs(title = "Salary distribution by rank")
Hint: See the code in 4.3.2 Grouped kernel density plots.
ggplot(stock_returns,
aes(x = symbol,
y = daily.returns )) +
geom_boxplot() +
labs(title = "Salary distribution by rank")
Hint: See the code in 4.3.3 Box plots.
Hint: Use message, echo and results in the chunk options. Refer to the RMarkdown Reference Guide.