Install and load necessary packages

install.packages(c(“readxl”, “ggplot2”, “plotly”)) library(readxl) library(ggplot2) library(plotly) install.packages(c(“rmarkdown”, “knitr”, “htmlwidgets”))

Load the data

data <- read_excel(“C:/Users/Admin/Desktop/R and packages/Data for the project.xlsx”, sheet = “Data”)

Clean data by removing missing values

data <- na.omit(data)

Fit the initial linear regression model

model <- lm(GDP_growth ~ FDI, data = data)

Summary of the initial model

summary(model)

Add predicted values to the data for visualization

data$predicted <- predict(model)

Plot the original data and regression line

ggplot(data, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of original data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Calculate Cook’s Distance to identify outliers

cooks_distance <- cooks.distance(model) data$outlier_dummy <- ifelse(cooks_distance > 1, 1, 0)

Data without outliers

data_no_outliers <- data[cooks_distance <= 1, ]

Plot regression without outliers

ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of no-outlier data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI (No Outliers)”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Summarize exclusions

excluded_data <- data[cooks_distance > 1, ] # Observations with Cook’s Distance > 1 percentage_excluded <- nrow(excluded_data) / nrow(data) * 100

————————- Validating Assumptions —————-

Add residuals and fitted values to data without outliers

data_no_outliers\(residuals <- residuals(model)[cooks_distance <= 1] data_no_outliers\)fitted <- fitted(model)[cooks_distance <= 1]

(1) Linearity: Visual inspection via scatter plots

ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “red”) + labs(title = “Linearity Check: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

(2) Independence: Histogram of residuals

ggplot(data_no_outliers, aes(x = residuals)) + geom_histogram(binwidth = 1, fill = “blue”, color = “black”, alpha = 0.7) + labs(title = “Histogram of Residuals (No Outliers)”, x = “Residuals”, y = “Frequency”) + theme_minimal()

(3) Homoscedasticity: Residuals vs fitted values

ggplot(data_no_outliers, aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, linetype = “dashed”, color = “red”) + labs(title = “Residuals vs Fitted Values”, x = “Fitted values”, y = “Residuals”) + theme_minimal()

(4) Normality: Q-Q plot

qqnorm(data_no_outliers\(residuals) qqline(data_no_outliers\)residuals, col = “red”)

————————- Interactive Plots —————-

plot_list <- list()

Original data with outliers highlighted

plot_list[[“Original”]] <- ggplot(data, aes(x = FDI, y = GDP_growth, text = paste(“Country:”, Country))) + geom_point(aes(color = as.factor(outlier_dummy)), size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + scale_color_manual(values = c(“0” = “black”, “1” = “red”), name = “Outlier Status”, labels = c(“Regular”, “Outlier”)) + labs(title = “Original Data: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal() + theme(legend.position = “top”)

Data without outliers

plot_list[[“No Outliers”]] <- ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “black”, size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + labs(title = “Data Without Outliers: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Convert each plot to interactive

interactive_plots <- lapply(plot_list, ggplotly, tooltip = “text”)

Display interactive plots

interactive_plots[[“Original”]] interactive_plots[[“No Outliers”]]