library(tidyverse)
library(readxl)
library(magrittr)
library(GGally)
stocks <- read_excel("G:/My Drive/homework/Tyler G/Stocks.xlsx")

stocks %<>% mutate(Book_Value = `Book Value`,
                  week_52_low = `52 week low`,
                  week_52_high = `52 week high`,
                  Market_Cap = `Market Cap`,
                  .keep = "unused",
                  .after = EPS) %>%
  mutate(across(.cols = c(Price:week_52_high), as.double))

stocks %>% glimpse()
## Rows: 345
## Columns: 12
## $ Name         <chr> "3M Company", "Abbott Laboratories", "Accenture plc", "Ac~
## $ Symbol       <chr> "MMM", "ABT", "ACN", "ATVI", "AYI", "AAP", "AET", "AMG", ~
## $ Sector       <chr> "Industrials", "Health Care", "Information Technology", "~
## $ Price        <dbl> 189.09, 45.00, 124.14, 48.06, 205.41, 151.99, 130.59, 166~
## $ Dividend     <dbl> 2.48, 2.34, 1.96, 0.64, 0.25, 0.15, 1.53, 0.47, 2.38, 1.0~
## $ PE           <dbl> 23.17, 48.03, 18.37, 37.55, 29.68, 24.51, 20.37, 19.44, 1~
## $ EPS          <dbl> 8.16, 0.94, 6.76, 1.28, 6.92, 6.20, 6.41, 8.57, 6.42, 1.5~
## $ Book_Value   <dbl> 17.26, 13.94, 11.95, 12.23, 39.50, 39.66, 50.84, 63.84, 5~
## $ week_52_low  <dbl> 158.28, 36.76, 102.10, 30.37, 193.06, 132.98, 104.59, 130~
## $ week_52_high <dbl> 190.54, 45.83, 125.72, 48.36, 280.89, 177.83, 136.50, 179~
## $ Market_Cap   <dbl> 112.74, 77.76, 77.29, 36.13, 9.00, 11.18, 45.93, 9.44, 28~
## $ EBITDA       <dbl> 8.70000, 4.59000, 5.66000, 2.14000, 0.58620, 1.12000, 6.2~
stocks %>% mutate(across(.cols = where(is.character), factor)) %>% summary()
##                   Name         Symbol                       Sector  
##  3M Company         :  1   A      :  1   Financials            :60  
##  Abbott Laboratories:  1   AAL    :  1   Consumer Discretionary:55  
##  Accenture plc      :  1   AAP    :  1   Industrials           :52  
##  Activision Blizzard:  1   AAPL   :  1   Information Technology:39  
##  Acuity Brands Inc  :  1   ABC    :  1   Health Care           :35  
##  Advance Auto Parts :  1   ABT    :  1   Real Estate           :27  
##  (Other)            :339   (Other):339   (Other)               :77  
##      Price           Dividend           PE              EPS        
##  Min.   : 12.46   Min.   :0.000   Min.   :  5.36   Min.   : 0.250  
##  1st Qu.: 48.23   1st Qu.:1.395   1st Qu.: 16.97   1st Qu.: 1.940  
##  Median : 71.75   Median :2.010   Median : 22.07   Median : 3.265  
##  Mean   : 84.97   Mean   :2.181   Mean   : 25.86   Mean   : 4.040  
##  3rd Qu.:108.60   3rd Qu.:2.913   3rd Qu.: 28.84   3rd Qu.: 5.407  
##  Max.   :385.35   Max.   :9.200   Max.   :209.05   Max.   :38.180  
##  NA's   :1        NA's   :1       NA's   :1        NA's   :1       
##    Book_Value      week_52_low      week_52_high      Market_Cap    
##  Min.   :  0.47   Min.   :  7.53   Min.   : 14.22   Min.   :  4.18  
##  1st Qu.: 11.93   1st Qu.: 36.13   1st Qu.: 52.05   1st Qu.: 12.27  
##  Median : 20.90   Median : 57.05   Median : 79.24   Median : 22.31  
##  Mean   : 27.31   Mean   : 66.40   Mean   : 91.72   Mean   : 45.16  
##  3rd Qu.: 35.26   3rd Qu.: 84.98   3rd Qu.:114.47   3rd Qu.: 44.57  
##  Max.   :212.79   Max.   :317.60   Max.   :399.46   Max.   :732.00  
##                   NA's   :2        NA's   :1                        
##      EBITDA       
##  Min.   : 0.0000  
##  1st Qu.: 0.9826  
##  Median : 1.8800  
##  Mean   : 3.9818  
##  3rd Qu.: 4.0500  
##  Max.   :69.7500  
## 
stocks %>%
  select(Sector:EBITDA) %>%
  mutate(across(.cols = Sector, factor)) %>%
  ggpairs(axisLabels = "none")

1. Data Cleaning

Part (a)

stocks %>% distinct(Name) %>% nrow()
## [1] 345

Part (b)

stocks %>%
  gather() %>% # pivot_longer error
  group_by(key) %>%
  summarize(NAs = sum(is.na(value)))
## # A tibble: 12 x 2
##    key            NAs
##    <chr>        <int>
##  1 Book_Value       0
##  2 Dividend         1
##  3 EBITDA           0
##  4 EPS              1
##  5 Market_Cap       0
##  6 Name             0
##  7 PE               1
##  8 Price            1
##  9 Sector           0
## 10 Symbol           0
## 11 week_52_high     1
## 12 week_52_low      2
stocks %>% complete.cases() %>% sum()
## [1] 339

Part (c)

stocks %<>% drop_na() 

Part (d) (e) (f)

stocks %<>% mutate(logBookValue = log10(Book_Value), .after = Book_Value)
stocks %<>% mutate(logMarketCap = log10(Market_Cap), .after = Market_Cap)
stocks %<>% mutate(logEBITDA = log10(EBITDA), .after = EBITDA)

Part (g)

stocks %<>% mutate(PE_ratio = if_else(PE > 15, 1, 0), .after = PE)

Part (h)

stocks$Name[3] <- "Bob the Builder"
stocks %>% select(Name) %>% slice_head(n = 5)
## # A tibble: 5 x 1
##   Name               
##   <chr>              
## 1 3M Company         
## 2 Abbott Laboratories
## 3 Bob the Builder    
## 4 Activision Blizzard
## 5 Acuity Brands Inc

Part (i)

stocks %>% write_csv("G:/My Drive/homework/Tyler G/stocks.csv")

2. Data Inspection

Part (a) (b)

stocks %>% 
  group_by(Sector) %>%
  summarize(Total_Companies = NROW(Name),
            Stock_Avg = mean(Price),
            Stock_Std.Dev = sd(Price))
## # A tibble: 11 x 4
##    Sector                     Total_Companies Stock_Avg Stock_Std.Dev
##    <chr>                                <int>     <dbl>         <dbl>
##  1 Consumer Discretionary                  54      69.9          37.2
##  2 Consumer Staples                        23      82.9          33.2
##  3 Energy                                   6      59.0          22.0
##  4 Financials                              58      78.4          60.5
##  5 Health Care                             35     107.           55.6
##  6 Industrials                             52     105.           59.8
##  7 Information Technology                  39      74.5          46.8
##  8 Materials                               20     104.           63.9
##  9 Real Estate                             27     100.           81.4
## 10 Telecommunication Services               3      37.9          13.9
## 11 Utilities                               22      63.0          27.3

Part (c)

cor(stocks$Book_Value, stocks$Market_Cap)
## [1] 0.02055178

Part (d)

stocks %>% slice_max(Dividend, n = 5) %>% select(Name, Dividend)
## # A tibble: 5 x 2
##   Name                       Dividend
##   <chr>                         <dbl>
## 1 CenturyLink Inc                9.2 
## 2 Iron Mountain Incorporated     6.05
## 3 Mattel Inc.                    5.97
## 4 Seagate Technology             5.15
## 5 Welltower Inc.                 5

Part (e)

stocks %>% slice_min(EPS, n = 5) %>% select(Name, EPS)
## # A tibble: 5 x 2
##   Name                         EPS
##   <chr>                      <dbl>
## 1 Kinder Morgan               0.25
## 2 Leucadia National Corp.     0.34
## 3 Microchip Technology        0.41
## 4 Iron Mountain Incorporated  0.42
## 5 Western Union Co            0.51

Part (f)

stocks %>% filter(Book_Value > Market_Cap) %>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1   169

Part (g)

stocks %>%
  select(Price, week_52_low, week_52_high) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(value)) +
  geom_histogram() +
  facet_wrap(vars(name))

stocks %>%
  select(Price, week_52_low, week_52_high) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(value)) +
  geom_density() +
  facet_wrap(vars(name))

Part (h)

stocks %>%
  ggplot(aes(Book_Value, Market_Cap)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "red")
## `geom_smooth()` using formula 'y ~ x'

Part (i)

stocks %>%
  ggplot(aes(Price, group = PE_ratio, fill = factor(PE_ratio))) +
  geom_boxplot(varwidth = TRUE) +
  scale_fill_discrete(labels = c("Ratio > 15", "Ratio <= 15"), name = "") +
  ggtitle("Price to Earnings Ratio") +
  theme(axis.text.y = element_blank())