Install and load necessary packages

install.packages(c(“readxl”, “ggplot2”, “plotly”)) library(readxl) library(ggplot2) library(plotly) install.packages(c(“rmarkdown”, “knitr”, “htmlwidgets”))

Load the data

data <- read_excel(“C:/Users/Admin/Desktop/R and packages/Data for the project.xlsx”, sheet = “Data”)

Clean data by removing missing values

data <- na.omit(data)

Fit the initial linear regression model

model <- lm(GDP_growth ~ FDI, data = data)

Summary of the initial model

summary(model)

Add predicted values to the data for visualization

data$predicted <- predict(model)

Plot the original data and regression line

ggplot(data, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of original data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Calculate Cook’s Distance to identify outliers

cooks_distance <- cooks.distance(model) data$outlier_dummy <- ifelse(cooks_distance > 1, 1, 0)

Data without outliers

data_no_outliers <- data[cooks_distance <= 1, ]

Plot regression without outliers

ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of no-outlier data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI (No Outliers)”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Summarize exclusions

excluded_data <- data[cooks_distance > 1, ] # Observations with Cook’s Distance > 1 percentage_excluded <- nrow(excluded_data) / nrow(data) * 100

Print summary of exclusions

cat(“Percentage of data excluded due to outliers:”, round(percentage_excluded, 2), “%”) cat(“Excluded Observations:”) print(excluded_data[, c(“Country”, “FDI”, “GDP_growth”)]) # Adjust columns as necessary

————————- Validating Assumptions —————-

Add residuals and fitted values to data without outliers

data_no_outliers$residuals <- residuals(model)[cooks_distance <= 1] data_no_outliers$fitted <- fitted(model)[cooks_distance <= 1]

(1) Linearity: Visual inspection via scatter plots

ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “red”) + labs(title = “Linearity Check: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

(2) Independence: Histogram of residuals

ggplot(data_no_outliers, aes(x = residuals)) + geom_histogram(binwidth = 1, fill = “blue”, color = “black”, alpha = 0.7) + labs(title = “Histogram of Residuals (No Outliers)”, x = “Residuals”, y = “Frequency”) + theme_minimal()

(3) Homoscedasticity: Residuals vs fitted values

ggplot(data_no_outliers, aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, linetype = “dashed”, color = “red”) + labs(title = “Residuals vs Fitted Values”, x = “Fitted values”, y = “Residuals”) + theme_minimal()

(4) Normality: Q-Q plot

qqnorm(data_no_outliers$residuals) qqline(data_no_outliers$residuals, col = “red”)

————————- Interactive Plots —————-

plot_list <- list()

Original data with outliers highlighted

plot_list[[“Original”]] <- ggplot(data, aes(x = FDI, y = GDP_growth, text = paste(“Country:”, Country))) + geom_point(aes(color = as.factor(outlier_dummy)), size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + scale_color_manual(values = c(“0” = “black”, “1” = “red”), name = “Outlier Status”, labels = c(“Regular”, “Outlier”)) + labs(title = “Original Data: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal() + theme(legend.position = “top”)

Data without outliers

plot_list[[“No Outliers”]] <- ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “black”, size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + labs(title = “Data Without Outliers: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()

Convert each plot to interactive

interactive_plots <- lapply(plot_list, ggplotly, tooltip = “text”)

Display interactive plots

interactive_plots[[“Original”]] interactive_plots[[“No Outliers”]]

Final project

Install and load necessary packages

Load the data

Clean data by removing missing values

Fit the initial linear regression model

Summary of the initial model

Add predicted values to the data for visualization

Plot the original data and regression line

Calculate Cook’s Distance to identify outliers

Data without outliers

Plot regression without outliers

Summarize exclusions

Print summary of exclusions

————————- Validating Assumptions —————-

Add residuals and fitted values to data without outliers

(1) Linearity: Visual inspection via scatter plots

(2) Independence: Histogram of residuals

(3) Homoscedasticity: Residuals vs fitted values

(4) Normality: Q-Q plot

————————- Interactive Plots —————-

Original data with outliers highlighted

Data without outliers

Convert each plot to interactive

Display interactive plots