library(tidyverse)
library(readxl)
library(magrittr)
library(GGally)
stocks <- read_excel("G:/My Drive/homework/Tyler G/Stocks.xlsx")
stocks %<>% mutate(Book_Value = `Book Value`,
week_52_low = `52 week low`,
week_52_high = `52 week high`,
Market_Cap = `Market Cap`,
.keep = "unused",
.after = EPS) %>%
mutate(across(.cols = c(Price:week_52_high), as.double))
stocks %>% glimpse()
## Rows: 345
## Columns: 12
## $ Name <chr> "3M Company", "Abbott Laboratories", "Accenture plc", "Ac~
## $ Symbol <chr> "MMM", "ABT", "ACN", "ATVI", "AYI", "AAP", "AET", "AMG", ~
## $ Sector <chr> "Industrials", "Health Care", "Information Technology", "~
## $ Price <dbl> 189.09, 45.00, 124.14, 48.06, 205.41, 151.99, 130.59, 166~
## $ Dividend <dbl> 2.48, 2.34, 1.96, 0.64, 0.25, 0.15, 1.53, 0.47, 2.38, 1.0~
## $ PE <dbl> 23.17, 48.03, 18.37, 37.55, 29.68, 24.51, 20.37, 19.44, 1~
## $ EPS <dbl> 8.16, 0.94, 6.76, 1.28, 6.92, 6.20, 6.41, 8.57, 6.42, 1.5~
## $ Book_Value <dbl> 17.26, 13.94, 11.95, 12.23, 39.50, 39.66, 50.84, 63.84, 5~
## $ week_52_low <dbl> 158.28, 36.76, 102.10, 30.37, 193.06, 132.98, 104.59, 130~
## $ week_52_high <dbl> 190.54, 45.83, 125.72, 48.36, 280.89, 177.83, 136.50, 179~
## $ Market_Cap <dbl> 112.74, 77.76, 77.29, 36.13, 9.00, 11.18, 45.93, 9.44, 28~
## $ EBITDA <dbl> 8.70000, 4.59000, 5.66000, 2.14000, 0.58620, 1.12000, 6.2~
stocks %>% mutate(across(.cols = where(is.character), factor)) %>% summary()
## Name Symbol Sector
## 3M Company : 1 A : 1 Financials :60
## Abbott Laboratories: 1 AAL : 1 Consumer Discretionary:55
## Accenture plc : 1 AAP : 1 Industrials :52
## Activision Blizzard: 1 AAPL : 1 Information Technology:39
## Acuity Brands Inc : 1 ABC : 1 Health Care :35
## Advance Auto Parts : 1 ABT : 1 Real Estate :27
## (Other) :339 (Other):339 (Other) :77
## Price Dividend PE EPS
## Min. : 12.46 Min. :0.000 Min. : 5.36 Min. : 0.250
## 1st Qu.: 48.23 1st Qu.:1.395 1st Qu.: 16.97 1st Qu.: 1.940
## Median : 71.75 Median :2.010 Median : 22.07 Median : 3.265
## Mean : 84.97 Mean :2.181 Mean : 25.86 Mean : 4.040
## 3rd Qu.:108.60 3rd Qu.:2.913 3rd Qu.: 28.84 3rd Qu.: 5.407
## Max. :385.35 Max. :9.200 Max. :209.05 Max. :38.180
## NA's :1 NA's :1 NA's :1 NA's :1
## Book_Value week_52_low week_52_high Market_Cap
## Min. : 0.47 Min. : 7.53 Min. : 14.22 Min. : 4.18
## 1st Qu.: 11.93 1st Qu.: 36.13 1st Qu.: 52.05 1st Qu.: 12.27
## Median : 20.90 Median : 57.05 Median : 79.24 Median : 22.31
## Mean : 27.31 Mean : 66.40 Mean : 91.72 Mean : 45.16
## 3rd Qu.: 35.26 3rd Qu.: 84.98 3rd Qu.:114.47 3rd Qu.: 44.57
## Max. :212.79 Max. :317.60 Max. :399.46 Max. :732.00
## NA's :2 NA's :1
## EBITDA
## Min. : 0.0000
## 1st Qu.: 0.9826
## Median : 1.8800
## Mean : 3.9818
## 3rd Qu.: 4.0500
## Max. :69.7500
##
stocks %>%
select(Sector:EBITDA) %>%
mutate(across(.cols = Sector, factor)) %>%
ggpairs(axisLabels = "none")

1. Data Cleaning
Part (a)
stocks %>% distinct(Name) %>% nrow()
## [1] 345
Part (b)
stocks %>%
gather() %>% # pivot_longer error
group_by(key) %>%
summarize(NAs = sum(is.na(value)))
## # A tibble: 12 x 2
## key NAs
## <chr> <int>
## 1 Book_Value 0
## 2 Dividend 1
## 3 EBITDA 0
## 4 EPS 1
## 5 Market_Cap 0
## 6 Name 0
## 7 PE 1
## 8 Price 1
## 9 Sector 0
## 10 Symbol 0
## 11 week_52_high 1
## 12 week_52_low 2
stocks %>% complete.cases() %>% sum()
## [1] 339
Part (c)
stocks %<>% drop_na()
Part (d) (e) (f)
stocks %<>% mutate(logBookValue = log10(Book_Value), .after = Book_Value)
stocks %<>% mutate(logMarketCap = log10(Market_Cap), .after = Market_Cap)
stocks %<>% mutate(logEBITDA = log10(EBITDA), .after = EBITDA)
Part (g)
stocks %<>% mutate(PE_ratio = if_else(PE > 15, 1, 0), .after = PE)
Part (h)
stocks$Name[3] <- "Bob the Builder"
stocks %>% select(Name) %>% slice_head(n = 5)
## # A tibble: 5 x 1
## Name
## <chr>
## 1 3M Company
## 2 Abbott Laboratories
## 3 Bob the Builder
## 4 Activision Blizzard
## 5 Acuity Brands Inc
Part (i)
stocks %>% write_csv("G:/My Drive/homework/Tyler G/stocks.csv")
2. Data Inspection
Part (a) (b)
stocks %>%
group_by(Sector) %>%
summarize(Total_Companies = NROW(Name),
Stock_Avg = mean(Price),
Stock_Std.Dev = sd(Price))
## # A tibble: 11 x 4
## Sector Total_Companies Stock_Avg Stock_Std.Dev
## <chr> <int> <dbl> <dbl>
## 1 Consumer Discretionary 54 69.9 37.2
## 2 Consumer Staples 23 82.9 33.2
## 3 Energy 6 59.0 22.0
## 4 Financials 58 78.4 60.5
## 5 Health Care 35 107. 55.6
## 6 Industrials 52 105. 59.8
## 7 Information Technology 39 74.5 46.8
## 8 Materials 20 104. 63.9
## 9 Real Estate 27 100. 81.4
## 10 Telecommunication Services 3 37.9 13.9
## 11 Utilities 22 63.0 27.3
Part (c)
cor(stocks$Book_Value, stocks$Market_Cap)
## [1] 0.02055178
Part (d)
stocks %>% slice_max(Dividend, n = 5) %>% select(Name, Dividend)
## # A tibble: 5 x 2
## Name Dividend
## <chr> <dbl>
## 1 CenturyLink Inc 9.2
## 2 Iron Mountain Incorporated 6.05
## 3 Mattel Inc. 5.97
## 4 Seagate Technology 5.15
## 5 Welltower Inc. 5
Part (e)
stocks %>% slice_min(EPS, n = 5) %>% select(Name, EPS)
## # A tibble: 5 x 2
## Name EPS
## <chr> <dbl>
## 1 Kinder Morgan 0.25
## 2 Leucadia National Corp. 0.34
## 3 Microchip Technology 0.41
## 4 Iron Mountain Incorporated 0.42
## 5 Western Union Co 0.51
Part (f)
stocks %>% filter(Book_Value > Market_Cap) %>% count()
## # A tibble: 1 x 1
## n
## <int>
## 1 169
Part (g)
stocks %>%
select(Price, week_52_low, week_52_high) %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(value)) +
geom_histogram() +
facet_wrap(vars(name))

stocks %>%
select(Price, week_52_low, week_52_high) %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(value)) +
geom_density() +
facet_wrap(vars(name))

Part (h)
stocks %>%
ggplot(aes(Book_Value, Market_Cap)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "red")
## `geom_smooth()` using formula 'y ~ x'

Part (i)
stocks %>%
ggplot(aes(Price, group = PE_ratio, fill = factor(PE_ratio))) +
geom_boxplot(varwidth = TRUE) +
scale_fill_discrete(labels = c("Ratio > 15", "Ratio <= 15"), name = "") +
ggtitle("Price to Earnings Ratio") +
theme(axis.text.y = element_blank())
