# First question
library(readxl)
growth_value_data <- read_excel("jaggia_ba_2025_ch03_data.xlsx", 
                                sheet = "Growth_Value")

head(growth_value_data)
## # A tibble: 6 × 3
##    Year Growth Value
##   <dbl>  <dbl> <dbl>
## 1  1984   -5.5 -8.59
## 2  1985   39.9 22.1 
## 3  1986   13.0 14.7 
## 4  1987   -1.7 -8.58
## 5  1988   16.0 29.0 
## 6  1989   41.6 23.0
mean(growth_value_data$Growth)
## [1] 15.98923
median(growth_value_data$Growth)
## [1] 16.05
mean(growth_value_data$Value)
## [1] 11.99641
median(growth_value_data$Value)
## [1] 15.09
summary(growth_value_data)
##       Year          Growth           Value        
##  Min.   :1984   Min.   :-40.90   Min.   :-46.520  
##  1st Qu.:1994   1st Qu.:  2.13   1st Qu.:  1.195  
##  Median :2003   Median : 16.05   Median : 15.090  
##  Mean   :2003   Mean   : 15.99   Mean   : 11.996  
##  3rd Qu.:2012   3rd Qu.: 37.19   3rd Qu.: 22.605  
##  Max.   :2022   Max.   : 79.48   Max.   : 44.080
# secon question 
quantile(growth_value_data$Growth, 0.30)
##   30% 
## 6.738
max(growth_value_data$Growth) - min(growth_value_data$Growth)
## [1] 120.38
quantile(growth_value_data$Growth, 0.75) - quantile(growth_value_data$Growth, 0.25)
##    75% 
## 35.055
var(growth_value_data$Growth)
## [1] 657.9522
sd(growth_value_data$Growth)
## [1] 25.65058
boxplot(growth_value_data$Growth, 
        growth_value_data$Value, 
        xlab = "Growth and Value funds have similar returns, \n but Growth shows slightly higher variability.”", 
        names = c("Growth", "Value"), 
        horizontal = FALSE, 
        col = c("blue", "red"))

outliersGrowth <- boxplot(growth_value_data$Growth)$out

outliersValue <- boxplot(growth_value_data$Value)$out

growth_value_data$Value[growth_value_data$Value %in% outliersValue] <- NA
summary(growth_value_data)
##       Year          Growth           Value        
##  Min.   :1984   Min.   :-40.90   Min.   :-17.340  
##  1st Qu.:1994   1st Qu.:  2.13   1st Qu.:  3.565  
##  Median :2003   Median : 16.05   Median : 15.380  
##  Mean   :2003   Mean   : 15.99   Mean   : 13.536  
##  3rd Qu.:2012   3rd Qu.: 37.19   3rd Qu.: 22.773  
##  Max.   :2022   Max.   : 79.48   Max.   : 44.080  
##                                  NA's   :1
minzGrowth <- (min(growth_value_data$Growth) - mean(growth_value_data$Growth)) / sd(growth_value_data$Growth)
minzGrowth
## [1] -2.217854
maxzGrowth <- (max(growth_value_data$Growth) - mean(growth_value_data$Growth)) / sd(growth_value_data$Growth)
maxzGrowth
## [1] 2.475218
hist(growth_value_data$Growth,
     main = "Growth fund returns cluster around \n the mean with a few extreme years.",
     xlab = "Growth annual Return (%)",
     ylab = "Frequency",
     col  = "lightgray",
     border = "white")

plot(growth_value_data$Growth, growth_value_data$Value,
     xlab = "Growth Return (%)",
     ylab = "Value Return (%)",
     main = "Growth and Value returns move together over time,\n suggesting a positive relationship",
     col="chocolate", 
     pch = 16)