Hypothesis Testing

library(readxl)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lmtest)

## Warning: package 'lmtest' was built under R version 4.3.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.3.2

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(sandwich)

## Warning: package 'sandwich' was built under R version 4.3.3

# IMPORT DATASETS
disney_data <- read_xlsx("Disney-Data.xlsx")

ANALYZING THE DISNEY DATA

# Run the regression model for Disney
model_disney <- lm(disney ~ mktrf + smb + hml, data = disney_data)

# Display the summary of the regression model for Disney
summary(model_disney)

## 
## Call:
## lm(formula = disney ~ mktrf + smb + hml, data = disney_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.289 -23.937 -13.185   5.799 114.757 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  35.0669     1.7761  19.744   <2e-16 ***
## mktrf         0.3746     0.4038   0.928   0.3540    
## smb          -0.5814     0.6028  -0.964   0.3354    
## hml          -1.5122     0.6083  -2.486   0.0133 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.31 on 427 degrees of freedom
## Multiple R-squared:  0.01785,    Adjusted R-squared:  0.01095 
## F-statistic: 2.587 on 3 and 427 DF,  p-value: 0.05265

# Hypothesis Testing for Disney
# Test the hypothesis that the intercept is equal to zero
intercept_test_disney <- coeftest(model_disney, vcov = vcovHC(model_disney, type = "HC1"))
intercept_test_disney

## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 35.06693    1.74621 20.0818   <2e-16 ***
## mktrf        0.37462    0.41295  0.9072   0.3648    
## smb         -0.58136    0.53467 -1.0873   0.2775    
## hml         -1.51215    0.65120 -2.3221   0.0207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Test the hypothesis that the beta coefficient for `mktrf` is equal to one
linearHypothesis(model_disney, "mktrf = 1", vcov = vcovHC(model_disney, type = "HC1"))

## Linear hypothesis test
## 
## Hypothesis:
## mktrf = 1
## 
## Model 1: restricted model
## Model 2: disney ~ mktrf + smb + hml
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F Pr(>F)
## 1    428                 
## 2    427  1 2.2934 0.1307

# Test the joint hypothesis that the intercept is equal to zero and the beta coefficient for `mktrf` is equal to one
linearHypothesis(model_disney, c("(Intercept) = 0", "mktrf = 1"), vcov = vcovHC(model_disney, type = "HC1"))

## Linear hypothesis test
## 
## Hypothesis:
## (Intercept) = 0
## mktrf = 1
## 
## Model 1: restricted model
## Model 2: disney ~ mktrf + smb + hml
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F    Pr(>F)    
## 1    429                        
## 2    427  2 201.88 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Perform Diagnostic Testing for Disney
# Breusch-Pagan test for heteroscedasticity
bptest(model_disney)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_disney
## BP = 10.014, df = 3, p-value = 0.01845

# Durbin-Watson test for autocorrelation
dwtest(model_disney)

## 
##  Durbin-Watson test
## 
## data:  model_disney
## DW = 0.040434, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# Shapiro-Wilk test for normality
shapiro.test(residuals(model_disney))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_disney)
## W = 0.80125, p-value < 2.2e-16

# Plot residuals vs fitted values for Disney
plot(model_disney)

# Q-Q plot for residuals for Disney
qqnorm(residuals(model_disney))
qqline(residuals(model_disney))

ANALYZING THE GENERAL ELECTRIC DATA

# import dataset
general_electric_data <- read_xlsx("General-Electric-Data.xlsx")

# Run the regression model for GE
model_ge <- lm(ge ~ mktrf + smb + hml, data = general_electric_data)

# Display the summary of the regression model for GE
summary(model_ge)

## 
## Call:
## lm(formula = ge ~ mktrf + smb + hml, data = general_electric_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -105.693  -70.850    0.465   60.507  154.886 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 102.3784     3.3449  30.607   <2e-16 ***
## mktrf        -0.5049     0.7604  -0.664    0.507    
## smb           1.4257     1.1353   1.256    0.210    
## hml           1.4821     1.1455   1.294    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.37 on 427 degrees of freedom
## Multiple R-squared:  0.007164,   Adjusted R-squared:  0.0001882 
## F-statistic: 1.027 on 3 and 427 DF,  p-value: 0.3804

# Hypothesis Testing for GE
# Test the hypothesis that the intercept is equal to zero
intercept_test_ge <- coeftest(model_ge, vcov = vcovHC(model_ge, type = "HC1"))
intercept_test_ge

## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 102.37841    3.32382 30.8014   <2e-16 ***
## mktrf        -0.50492    0.70319 -0.7180   0.4731    
## smb           1.42567    1.27180  1.1210   0.2629    
## hml           1.48213    1.16580  1.2713   0.2043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Test the hypothesis that the beta coefficient for `mktrf` is equal to one
linearHypothesis(model_ge, "mktrf = 1", vcov = vcovHC(model_ge, type = "HC1"))

## Linear hypothesis test
## 
## Hypothesis:
## mktrf = 1
## 
## Model 1: restricted model
## Model 2: ge ~ mktrf + smb + hml
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F  Pr(>F)  
## 1    428                    
## 2    427  1 4.5801 0.03291 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Test the joint hypothesis that the intercept is equal to zero and the beta coefficient for `mktrf` is equal to one
linearHypothesis(model_ge, c("(Intercept) = 0", "mktrf = 1"), vcov = vcovHC(model_ge, type = "HC1"))

## Linear hypothesis test
## 
## Hypothesis:
## (Intercept) = 0
## mktrf = 1
## 
## Model 1: restricted model
## Model 2: ge ~ mktrf + smb + hml
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F    Pr(>F)    
## 1    429                        
## 2    427  2 476.95 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Perform Diagnostic Testing for GE
# Breusch-Pagan test for heteroscedasticity
bptest(model_ge)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_ge
## BP = 4.7847, df = 3, p-value = 0.1883

# Durbin-Watson test for autocorrelation
dwtest(model_ge)

## 
##  Durbin-Watson test
## 
## data:  model_ge
## DW = 0.034283, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# Shapiro-Wilk test for normality
shapiro.test(residuals(model_ge))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_ge)
## W = 0.93596, p-value = 1.179e-12

# Plot residuals vs fitted values for GE
plot(model_ge)

# Q-Q plot for residuals for GE
qqnorm(residuals(model_ge))
qqline(residuals(model_ge))

##Analyzing both Disney and General Electric Stocks to invest to

##Disney: Intercept: The intercept is highly significant (p < 0.001), indicating that there is a significant average return beyond just the market, size, and value factors. Beta Coefficient for mktrf: The coefficient for mktrf is not statistically significant (p = 0.3648), suggesting that Disney’s stock returns are not significantly related to market returns. Joint Hypothesis Test: The joint hypothesis test for the intercept and mktrf is highly significant (p < 0.001), indicating that both the intercept and mktrf are jointly significant. Diagnostic Testing: There is evidence of heteroscedasticity (BP test p = 0.01845) and autocorrelation (DW test p < 2.2e-16), and the residuals are not normally distributed (Shapiro-Wilk test p < 2.2e-16).

##General Electric: Intercept: The intercept is highly significant (p < 0.001), indicating that there is a significant average return beyond just the market, size, and value factors. Beta Coefficient for mktrf: The coefficient for mktrf is not statistically significant (p = 0.4731), suggesting that GE’s stock returns are not significantly related to market returns. Joint Hypothesis Test: The joint hypothesis test for the intercept and mktrf is highly significant (p < 0.001), indicating that both the intercept and mktrf are jointly significant. Diagnostic Testing: There is evidence of autocorrelation (DW test p < 2.2e-16), and the residuals are not normally distributed (Shapiro-Wilk test p = 1.179e-12).

##Conclusion: Neither Disney nor General Electric shows significant dependence on market returns (mktrf). Both companies have highly significant intercepts, indicating additional factors beyond the market, size, and value factors influencing stock returns. Diagnostic tests reveal issues with heteroscedasticity and normality in both cases. Given the results, neither Disney nor GE seems to be a clear winner based on this analysis alone. Other factors, such as qualitative aspects of the companies, industry trends, and broader market conditions, should also be considered when making investment decisions. It’s essential to conduct a comprehensive analysis and consult with financial experts before making investment decision

Hypothesis Testing

Nsovo Ntuli

2024-05-18

ANALYZING THE DISNEY DATA

ANALYZING THE GENERAL ELECTRIC DATA