final

# Load necessary libraries  
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

# Placeholder datasets  
# Replace these with actual datasets or load them if available  
sleep75 <- data.frame(sleep = rnorm(100, 3500, 200), totwrk = rnorm(100, 2000, 300), educ = rnorm(100, 12, 2), age = rnorm(100, 40, 10), male = sample(0:1, 100, replace = TRUE))  
gpa2 <- data.frame(sat = rnorm(1000, 1000, 200), hsize = rnorm(1000, 5, 2), female = sample(0:1, 1000, replace = TRUE), black = sample(0:1, 1000, replace = TRUE))  
vote1 <- data.frame(voteA = rnorm(500, 0.5, 0.1), prtystrA = rnorm(500, 0.4, 0.1), democA = rnorm(500, 0.6, 0.1), expendA = rnorm(500, 10, 2), expendB = rnorm(500, 12, 3))  
fertil2 <- data.frame(children = rpois(200, 2), age = rnorm(200, 30, 5), educ = rnorm(200, 12, 3), electric = sample(0:1, 200, replace = TRUE), urban = sample(0:1, 200, replace = TRUE))  
ceosal2 <- data.frame(salary = rnorm(177, 1000, 300), sales = rnorm(177, 200, 50), mktval = rnorm(177, 300, 100), profmarg = rnorm(177, 0.1, 0.02), ceoten = rnorm(177, 5, 2), comten = rnorm(177, 7, 3))  
housing_data <- data.frame(housing_starts = rnorm(100, 500, 100), interest_rate = rnorm(100, 5, 1), income = rnorm(100, 30000, 5000), quarter = rep(1:4, length.out = 100))  
nyse <- data.frame(return = rnorm(100, 0.05, 0.02), return_lag1 = rnorm(100, 0.04, 0.02), return_lag2 = rnorm(100, 0.03, 0.02))  

# Chapter 7  
model_sleep <- lm(sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)  
summary(model_sleep)

## 
## Call:
## lm(formula = sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -504.85 -143.40   44.78  147.84  373.42 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3717.52828  277.70534  13.387   <2e-16 ***
## totwrk        -0.01574    0.06430  -0.245    0.807    
## educ          -9.01292    8.74778  -1.030    0.306    
## age           -3.02739   11.75843  -0.257    0.797    
## I(age^2)       0.02515    0.14306   0.176    0.861    
## male         -18.73560   38.67431  -0.484    0.629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 191.2 on 94 degrees of freedom
## Multiple R-squared:  0.02089,    Adjusted R-squared:  -0.03119 
## F-statistic: 0.4011 on 5 and 94 DF,  p-value: 0.8469

# Chapter 8  
model_ff <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten + comten, data = ceosal2)  
model_ff_extended <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + I(ceoten^2) + I(comten^2), data = ceosal2)  
anova(model_ff, model_ff_extended)

## Analysis of Variance Table
## 
## Model 1: log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten + 
##     comten
## Model 2: log(salary) ~ log(sales) + log(mktval) + profmarg + I(ceoten^2) + 
##     I(comten^2)
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1    171 15.064                      
## 2    171 15.096  0 -0.032135

# Chapter 9  
model_vote <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1)  
residuals_vote <- resid(model_vote)  
summary(lm(residuals_vote ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1))

## 
## Call:
## lm(formula = residuals_vote ~ prtystrA + democA + log(expendA) + 
##     log(expendB), data = vote1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32242 -0.06739  0.00358  0.07051  0.32119 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)   9.574e-18  7.649e-02       0        1
## prtystrA     -1.273e-17  4.501e-02       0        1
## democA       -1.234e-17  4.682e-02       0        1
## log(expendA)  6.199e-20  2.278e-02       0        1
## log(expendB)  1.133e-18  1.688e-02       0        1
## 
## Residual standard error: 0.1034 on 495 degrees of freedom
## Multiple R-squared:  4.261e-33,  Adjusted R-squared:  -0.008081 
## F-statistic: 5.273e-31 on 4 and 495 DF,  p-value: 1

# Chapter 10  
housing_data$trend <- 1:nrow(housing_data)  
model_housing <- lm(housing_starts ~ interest_rate + income + trend + factor(quarter), data = housing_data)  
summary(model_housing)

## 
## Call:
## lm(formula = housing_starts ~ interest_rate + income + trend + 
##     factor(quarter), data = housing_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -270.571  -78.119    8.047   69.237  291.480 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      431.519322  83.583266   5.163 1.38e-06 ***
## interest_rate      0.019746  11.386777   0.002   0.9986    
## income             0.001761   0.002190   0.804   0.4234    
## trend             -0.086307   0.392556  -0.220   0.8265    
## factor(quarter)2  34.686776  32.116945   1.080   0.2829    
## factor(quarter)3  57.190624  31.944598   1.790   0.0767 .  
## factor(quarter)4  -6.002638  31.898713  -0.188   0.8511    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111.7 on 93 degrees of freedom
## Multiple R-squared:  0.06353,    Adjusted R-squared:  0.003109 
## F-statistic: 1.051 on 6 and 93 DF,  p-value: 0.3975

# Chapter 11  
model_arch_1 <- lm(I(return^2) ~ return_lag1, data = nyse)  
model_arch_2 <- lm(I(return^2) ~ return_lag1 + return_lag2, data = nyse)  
anova(model_arch_1, model_arch_2)

## Analysis of Variance Table
## 
## Model 1: I(return^2) ~ return_lag1
## Model 2: I(return^2) ~ return_lag1 + return_lag2
##   Res.Df        RSS Df  Sum of Sq      F Pr(>F)
## 1     98 0.00038458                            
## 2     97 0.00038220  1 2.3835e-06 0.6049 0.4386

# Chapter 12  
model_nyse <- lm(return ~ return_lag1 + I(return_lag1^2), data = nyse)  
summary(model_nyse)

## 
## Call:
## lm(formula = return ~ return_lag1 + I(return_lag1^2), data = nyse)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.054929 -0.015140  0.001772  0.014387  0.038496 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.041510   0.006491   6.395 5.63e-09 ***
## return_lag1       0.382794   0.330016   1.160    0.249    
## I(return_lag1^2) -4.568070   4.027274  -1.134    0.259    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02083 on 97 degrees of freedom
## Multiple R-squared:  0.01381,    Adjusted R-squared:  -0.006523 
## F-statistic: 0.6792 on 2 and 97 DF,  p-value: 0.5094

final

2025-01-07