install.packages(c(“ggplot2”, “MASS”, “lmtest”, “car”))
library(ggplot2) # For visualization library(MASS) # For boxcox
library(lmtest) # For Breusch-Pagan test library(car) # For additional
diagnostics if needed
set.seed(42)
n <- 200 firm_size <- runif(n, 10, 500) # Total Assets in millions
error <- rnorm(n, mean = 0, sd = 0.5) rd_expenditure <- exp(1.5 + 0.6 * log(firm_size) + error)
df_firms <- data.frame( Firm_ID = 1:n, Total_Assets = firm_size, RD_Expenditure = rd_expenditure )
head(df_firms)
ggplot(df_firms, aes(x = Total_Assets, y = RD_Expenditure)) + geom_point() + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + # Add linear fit line labs(title = “Relationship between Total Assets and R&D Expenditure”, x = “Total Assets (millions)”, y = “R&D Expenditure”) + theme_bw()
model <- lm(RD_Expenditure ~ Total_Assets, data = df_firms)
summary(model)
qqPlot(model, main = “Q-Q Plot of Residuals”)
shapiro.test(residuals(model))
plot(model, which = 1, main = “Residuals vs. Fitted”)
bptest(model)
bc <- boxcox(model, lambda = seq(-2, 2, 1/10))
optimal_lambda <- bc\(x[which.max(bc\)y)] optimal_lambda
if (optimal_lambda == 0) { df_firms\(RD_Transformed <- log(df_firms\)RD_Expenditure) } else { df_firms\(RD_Transformed <- (df_firms\)RD_Expenditure^optimal_lambda - 1) / optimal_lambda }
model_transformed <- lm(RD_Transformed ~ Total_Assets, data = df_firms)
summary(model_transformed)
qqPlot(model_transformed, main = “Q-Q Plot of Transformed Residuals”)
shapiro.test(residuals(model_transformed))
plot(model_transformed, which = 1, main = “Transformed Residuals vs. Fitted”)
bptest(model_transformed)