This report analyzes the relationship between firm size (Total Assets) and R&D expenditure. We visualize the relationship, run an OLS regression, diagnose residuals for normality and homoscedasticity, apply a Box-Cox transformation, and refine the regression model.
if(!require(ggplot2)) install.packages(“ggplot2”) if(!require(MASS)) install.packages(“MASS”) if(!require(car)) install.packages(“car”)
library(ggplot2) library(MASS) library(car) # Set seed for reproducibility set.seed(42)
n <- 200 firm_size <- runif(n, 10, 500)
error <- rnorm(n, mean = 0, sd = 0.5) rd_expenditure <- exp(1.5 + 0.6 * log(firm_size) + error)
df_firms <- data.frame( Firm_ID = 1:n, Total_Assets = firm_size, RD_Expenditure = rd_expenditure )
head(df_firms) ggplot(df_firms, aes(x = Total_Assets, y = RD_Expenditure)) + geom_point(color=“blue”) + geom_smooth(method=“lm”, color=“red”) + labs( title=“Relationship between Firm Size and R&D Expenditure”, x=“Total Assets”, y=“R&D Expenditure” ) + theme_minimal() model1 <- lm(RD_Expenditure ~ Total_Assets, data=df_firms) summary(model1) par(mfrow=c(2,2)) plot(model1) shapiro.test(residuals(model1)) ncvTest(model1) boxcox(model1) df_firms\(log_RD <- log(df_firms\)RD_Expenditure) df_firms\(log_assets <- log(df_firms\)Total_Assets) model2 <- lm(log_RD ~ log_assets, data=df_firms) summary(model2) par(mfrow=c(2,2)) plot(model2)