library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
options(warn = -1)
df <- read.csv("C:\\Users\\My Computer\\Downloads\\Economic Growth.csv")
df <- subset(df, select = -c(X, date, country))
set.seed(123)
i <- sample(1:1000, 1, replace = TRUE)
df[i, ] <- abs(df[i, ])
head(df)
## Human.Development.Index.Growth International.Tourist.Growth
## 1 0.83378549 -0.04651097
## 2 -0.43023586 2.39537893
## 3 0.65689481 -0.98270992
## 4 -0.07013110 0.06755870
## 5 0.28788931 -0.92150488
## 6 0.01185084 -0.70870233
## Export.growth.rate Tax.income.growth Percentage.Of.Poor.People
## 1 0.10839762 -0.7411865 -0.63373936
## 2 -0.31291181 0.7775949 -0.17610400
## 3 0.29350241 1.2348451 0.04919546
## 4 0.86092364 0.5579628 -0.29807123
## 5 0.07368099 0.8168575 1.24558545
## 6 -1.05886704 -0.2444449 1.12589177
## Economic.Growth
## 1 0.2274708
## 2 2.0690134
## 3 0.8734308
## 4 0.8156018
## 5 0.4168792
## 6 -0.9191443
summary(df)
## Human.Development.Index.Growth International.Tourist.Growth Export.growth.rate
## Min. :-3.03392 Min. :-2.711543 Min. :-3.52625
## 1st Qu.:-0.63974 1st Qu.:-0.745236 1st Qu.:-0.61698
## Median : 0.06965 Median : 0.006973 Median : 0.06944
## Mean : 0.03943 Mean :-0.013333 Mean : 0.04947
## 3rd Qu.: 0.70456 3rd Qu.: 0.686762 3rd Qu.: 0.72964
## Max. : 2.91650 Max. : 3.270161 Max. : 3.61119
## Tax.income.growth Percentage.Of.Poor.People Economic.Growth
## Min. :-3.63965 Min. :-2.91463 Min. :-4.2873
## 1st Qu.:-0.68286 1st Qu.:-0.73222 1st Qu.:-0.9313
## Median :-0.08024 Median :-0.03698 Median : 0.1393
## Mean :-0.03308 Mean :-0.02165 Mean : 0.1223
## 3rd Qu.: 0.63652 3rd Qu.: 0.64196 3rd Qu.: 1.1111
## Max. : 3.86947 Max. : 4.03960 Max. : 4.6686
X <- subset(df, select = -c(Economic.Growth))
Y <- subset(df, select = c(Economic.Growth))
ggplot(df, aes(x = Economic.Growth)) +
geom_boxplot(fill = "orange", color = "orange3") +
labs(title = "Boxplot of Economic Growth",y = "Economic Growth", x = "value") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.43))

ggplot(df, aes(x = Economic.Growth)) +
geom_histogram(aes(y = ..density..), binwidth = 1, fill = "orange", color = "black") +
geom_density(alpha = 0.3, color = "red3") +
labs(title = "Histogram of Economic Growth with Density Curve",
x = "Economic Growth",
y = "Density") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))

ggpairs(df)

par(mfrow = c(2, 3))
plot(df$Human.Development.Index.Growth, df$Economic.Growth, main = "X1 vs Y", xlab = "X1", ylab = "Y", col = "orange", pch = 19)
plot(df$International.Tourist.Growth, df$Economic.Growth, main = "X2 vs Y", xlab = "X2", ylab = "Y", col = "orange", pch = 19)
plot(df$Export.growth.rate, df$Economic.Growth, main = "X3 vs Y", xlab = "X3", ylab = "Y", col = "orange", pch = 19)
plot(df$Tax.income.growth, df$Economic.Growth, main = "X4 vs Y", xlab = "X4", ylab = "Y", col = "orange", pch = 19)
plot(df$Percentage.Of.Poor.People, df$Economic.Growth, main = "X4 vs Y", xlab = "X5", ylab = "Y", col = "orange", pch = 19)

names(df)
## [1] "Human.Development.Index.Growth" "International.Tourist.Growth"
## [3] "Export.growth.rate" "Tax.income.growth"
## [5] "Percentage.Of.Poor.People" "Economic.Growth"
model <- lm(Economic.Growth~Human.Development.Index.Growth+International.Tourist.Growth+
Export.growth.rate+Tax.income.growth+Percentage.Of.Poor.People, data = df)
summary(model)
##
## Call:
## lm(formula = Economic.Growth ~ Human.Development.Index.Growth +
## International.Tourist.Growth + Export.growth.rate + Tax.income.growth +
## Percentage.Of.Poor.People, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.167546 -0.006374 0.000095 0.007139 0.034350
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1005120 0.0003552 282.9 <2e-16 ***
## Human.Development.Index.Growth 0.9217852 0.0003609 2554.0 <2e-16 ***
## International.Tourist.Growth 0.8365333 0.0003437 2433.6 <2e-16 ***
## Export.growth.rate 0.4764290 0.0003543 1344.9 <2e-16 ***
## Tax.income.growth 0.6850274 0.0003583 1912.1 <2e-16 ***
## Percentage.Of.Poor.People 0.2000341 0.0003503 571.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0112 on 994 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 3.678e+06 on 5 and 994 DF, p-value: < 2.2e-16
fitted_values <- fitted(model)
residuals <- residuals(model)
t.test(residuals, mu = 0)
##
## One Sample t-test
##
## data: residuals
## t = 3.3909e-15, df = 999, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.0006934539 0.0006934539
## sample estimates:
## mean of x
## 1.198281e-18
plot(fitted_values, residuals,
main = "Residuals vs Fitted Values", xlab = "Fitted Values",
ylab = "Residuals", pch = 21, bg = "orange3", col = "grey3")
abline(h = 0, col = "red")

bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 2.0994, df = 5, p-value = 0.8352
plot(residuals, type = "p", pch = 21, bg = "orange3", col = "grey3",
main = "Autocorrelation Plot", xlab = "Observation", ylab = "Residuals")
abline(h = 0, col = "black", lty = 2)

dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 2.0334, p-value = 0.7008
## alternative hypothesis: true autocorrelation is greater than 0
ks.test(residuals, "pnorm", mean = mean(residuals), sd=sd(residuals))
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: residuals
## D = 0.039375, p-value = 0.09002
## alternative hypothesis: two-sided
qqnorm(residuals, main = "Q-Q Plot of Residuals", col = "orange")
qqline(residuals, col = "red3")

vif(model)
## Human.Development.Index.Growth International.Tourist.Growth
## 1.005458 1.004421
## Export.growth.rate Tax.income.growth
## 1.005052 1.008318
## Percentage.Of.Poor.People
## 1.004044