Load Libraries
library(ggplot2)
library(MASS)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
Simulate Data
set.seed(42)
n <- 200
firm_size <- runif(n, 10, 500)
error <- rnorm(n, mean = 0, sd = 0.5)
rd_expenditure <- exp(1.5 + 0.6 * log(firm_size) + error)
df_firms <- data.frame(
Firm_ID = 1:n,
Total_Assets = firm_size,
RD_Expenditure = rd_expenditure
)
head(df_firms)
## Firm_ID Total_Assets RD_Expenditure
## 1 1 458.2550 322.76939
## 2 2 469.1670 302.76313
## 3 3 150.2084 54.90529
## 4 4 416.9193 421.56611
## 5 5 324.4553 103.12089
## 6 6 264.3570 134.17397
1. Visualize the Relationship
ggplot(df_firms, aes(x = Total_Assets, y = RD_Expenditure)) +
geom_point(alpha = 0.6) +
labs(title = "R&D Expenditure vs Total Assets",
x = "Total Assets (Millions)",
y = "R&D Expenditure") +
theme_minimal()

2. OLS Regression
model1 <- lm(RD_Expenditure ~ Total_Assets, data = df_firms)
summary(model1)
##
## Call:
## lm(formula = RD_Expenditure ~ Total_Assets, data = df_firms)
##
## Residuals:
## Min 1Q Median 3Q Max
## -135.79 -42.06 -12.37 25.08 404.97
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.50788 11.25914 3.598 0.000405 ***
## Total_Assets 0.35091 0.03731 9.405 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 75.31 on 198 degrees of freedom
## Multiple R-squared: 0.3088, Adjusted R-squared: 0.3053
## F-statistic: 88.46 on 1 and 198 DF, p-value: < 2.2e-16
3. Residual Diagnostics
par(mfrow = c(2, 2))
plot(model1)

jarque.bera.test(residuals(model1))
##
## Jarque Bera Test
##
## data: residuals(model1)
## X-squared = 332.83, df = 2, p-value < 2.2e-16
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 13.298, df = 1, p-value = 0.0002657
6. Diagnostics for New Model
par(mfrow = c(2, 2))
plot(model2)

7. Compare Models
AIC(model1, model2)
## df AIC
## model1 3 2300.2201
## model2 3 320.8362