install.packages(c(“readxl”, “ggplot2”, “plotly”)) library(readxl) library(ggplot2) library(plotly) install.packages(c(“rmarkdown”, “knitr”, “htmlwidgets”))
data <- read_excel(“C:/Users/Admin/Desktop/R and packages/Data for the project.xlsx”, sheet = “Data”)
data <- na.omit(data)
model <- lm(GDP_growth ~ FDI, data = data)
summary(model)
data$predicted <- predict(model)
ggplot(data, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of original data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()
cooks_distance <- cooks.distance(model) data$outlier_dummy <- ifelse(cooks_distance > 1, 1, 0)
data_no_outliers <- data[cooks_distance <= 1, ]
ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + # Scatter plot of no-outlier data geom_smooth(method = “lm”, se = FALSE, color = “red”) + # Regression line labs(title = “Linear Regression: GDP Growth vs FDI (No Outliers)”, x = “FDI”, y = “GDP Growth”) + theme_minimal()
excluded_data <- data[cooks_distance > 1, ] # Observations with Cook’s Distance > 1 percentage_excluded <- nrow(excluded_data) / nrow(data) * 100
cat(“Percentage of data excluded due to outliers:”, round(percentage_excluded, 2), “%”) cat(“Excluded Observations:”) print(excluded_data[, c(“Country”, “FDI”, “GDP_growth”)]) # Adjust columns as necessary
data_no_outliers\(residuals <- residuals(model)[cooks_distance <= 1] data_no_outliers\)fitted <- fitted(model)[cooks_distance <= 1]
ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “blue”, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “red”) + labs(title = “Linearity Check: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()
ggplot(data_no_outliers, aes(x = residuals)) + geom_histogram(binwidth = 1, fill = “blue”, color = “black”, alpha = 0.7) + labs(title = “Histogram of Residuals (No Outliers)”, x = “Residuals”, y = “Frequency”) + theme_minimal()
ggplot(data_no_outliers, aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, linetype = “dashed”, color = “red”) + labs(title = “Residuals vs Fitted Values”, x = “Fitted values”, y = “Residuals”) + theme_minimal()
qqnorm(data_no_outliers\(residuals) qqline(data_no_outliers\)residuals, col = “red”)
plot_list <- list()
plot_list[[“Original”]] <- ggplot(data, aes(x = FDI, y = GDP_growth, text = paste(“Country:”, Country))) + geom_point(aes(color = as.factor(outlier_dummy)), size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + scale_color_manual(values = c(“0” = “black”, “1” = “red”), name = “Outlier Status”, labels = c(“Regular”, “Outlier”)) + labs(title = “Original Data: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal() + theme(legend.position = “top”)
plot_list[[“No Outliers”]] <- ggplot(data_no_outliers, aes(x = FDI, y = GDP_growth)) + geom_point(color = “black”, size = 3, alpha = 0.7) + geom_smooth(method = “lm”, se = FALSE, color = “blue”) + labs(title = “Data Without Outliers: GDP Growth vs FDI”, x = “FDI”, y = “GDP Growth”) + theme_minimal()
interactive_plots <- lapply(plot_list, ggplotly, tooltip = “text”)
interactive_plots[[“Original”]] interactive_plots[[“No Outliers”]]