# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
# Placeholder datasets
# Replace these with actual datasets or load them if available
sleep75 <- data.frame(sleep = rnorm(100, 3500, 200), totwrk = rnorm(100, 2000, 300), educ = rnorm(100, 12, 2), age = rnorm(100, 40, 10), male = sample(0:1, 100, replace = TRUE))
gpa2 <- data.frame(sat = rnorm(1000, 1000, 200), hsize = rnorm(1000, 5, 2), female = sample(0:1, 1000, replace = TRUE), black = sample(0:1, 1000, replace = TRUE))
vote1 <- data.frame(voteA = rnorm(500, 0.5, 0.1), prtystrA = rnorm(500, 0.4, 0.1), democA = rnorm(500, 0.6, 0.1), expendA = rnorm(500, 10, 2), expendB = rnorm(500, 12, 3))
fertil2 <- data.frame(children = rpois(200, 2), age = rnorm(200, 30, 5), educ = rnorm(200, 12, 3), electric = sample(0:1, 200, replace = TRUE), urban = sample(0:1, 200, replace = TRUE))
ceosal2 <- data.frame(salary = rnorm(177, 1000, 300), sales = rnorm(177, 200, 50), mktval = rnorm(177, 300, 100), profmarg = rnorm(177, 0.1, 0.02), ceoten = rnorm(177, 5, 2), comten = rnorm(177, 7, 3))
housing_data <- data.frame(housing_starts = rnorm(100, 500, 100), interest_rate = rnorm(100, 5, 1), income = rnorm(100, 30000, 5000), quarter = rep(1:4, length.out = 100))
nyse <- data.frame(return = rnorm(100, 0.05, 0.02), return_lag1 = rnorm(100, 0.04, 0.02), return_lag2 = rnorm(100, 0.03, 0.02))
# Chapter 7
model_sleep <- lm(sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
summary(model_sleep)
##
## Call:
## lm(formula = sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
##
## Residuals:
## Min 1Q Median 3Q Max
## -504.85 -143.40 44.78 147.84 373.42
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3717.52828 277.70534 13.387 <2e-16 ***
## totwrk -0.01574 0.06430 -0.245 0.807
## educ -9.01292 8.74778 -1.030 0.306
## age -3.02739 11.75843 -0.257 0.797
## I(age^2) 0.02515 0.14306 0.176 0.861
## male -18.73560 38.67431 -0.484 0.629
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 191.2 on 94 degrees of freedom
## Multiple R-squared: 0.02089, Adjusted R-squared: -0.03119
## F-statistic: 0.4011 on 5 and 94 DF, p-value: 0.8469
# Chapter 8
model_ff <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten + comten, data = ceosal2)
model_ff_extended <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + I(ceoten^2) + I(comten^2), data = ceosal2)
anova(model_ff, model_ff_extended)
## Analysis of Variance Table
##
## Model 1: log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten +
## comten
## Model 2: log(salary) ~ log(sales) + log(mktval) + profmarg + I(ceoten^2) +
## I(comten^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 171 15.064
## 2 171 15.096 0 -0.032135
# Chapter 9
model_vote <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1)
residuals_vote <- resid(model_vote)
summary(lm(residuals_vote ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1))
##
## Call:
## lm(formula = residuals_vote ~ prtystrA + democA + log(expendA) +
## log(expendB), data = vote1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32242 -0.06739 0.00358 0.07051 0.32119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.574e-18 7.649e-02 0 1
## prtystrA -1.273e-17 4.501e-02 0 1
## democA -1.234e-17 4.682e-02 0 1
## log(expendA) 6.199e-20 2.278e-02 0 1
## log(expendB) 1.133e-18 1.688e-02 0 1
##
## Residual standard error: 0.1034 on 495 degrees of freedom
## Multiple R-squared: 4.261e-33, Adjusted R-squared: -0.008081
## F-statistic: 5.273e-31 on 4 and 495 DF, p-value: 1
# Chapter 10
housing_data$trend <- 1:nrow(housing_data)
model_housing <- lm(housing_starts ~ interest_rate + income + trend + factor(quarter), data = housing_data)
summary(model_housing)
##
## Call:
## lm(formula = housing_starts ~ interest_rate + income + trend +
## factor(quarter), data = housing_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -270.571 -78.119 8.047 69.237 291.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 431.519322 83.583266 5.163 1.38e-06 ***
## interest_rate 0.019746 11.386777 0.002 0.9986
## income 0.001761 0.002190 0.804 0.4234
## trend -0.086307 0.392556 -0.220 0.8265
## factor(quarter)2 34.686776 32.116945 1.080 0.2829
## factor(quarter)3 57.190624 31.944598 1.790 0.0767 .
## factor(quarter)4 -6.002638 31.898713 -0.188 0.8511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 111.7 on 93 degrees of freedom
## Multiple R-squared: 0.06353, Adjusted R-squared: 0.003109
## F-statistic: 1.051 on 6 and 93 DF, p-value: 0.3975
# Chapter 11
model_arch_1 <- lm(I(return^2) ~ return_lag1, data = nyse)
model_arch_2 <- lm(I(return^2) ~ return_lag1 + return_lag2, data = nyse)
anova(model_arch_1, model_arch_2)
## Analysis of Variance Table
##
## Model 1: I(return^2) ~ return_lag1
## Model 2: I(return^2) ~ return_lag1 + return_lag2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 98 0.00038458
## 2 97 0.00038220 1 2.3835e-06 0.6049 0.4386
# Chapter 12
model_nyse <- lm(return ~ return_lag1 + I(return_lag1^2), data = nyse)
summary(model_nyse)
##
## Call:
## lm(formula = return ~ return_lag1 + I(return_lag1^2), data = nyse)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.054929 -0.015140 0.001772 0.014387 0.038496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.041510 0.006491 6.395 5.63e-09 ***
## return_lag1 0.382794 0.330016 1.160 0.249
## I(return_lag1^2) -4.568070 4.027274 -1.134 0.259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02083 on 97 degrees of freedom
## Multiple R-squared: 0.01381, Adjusted R-squared: -0.006523
## F-statistic: 0.6792 on 2 and 97 DF, p-value: 0.5094