#Panel Data
#Load Packages
install.packages("plm", repos = "https://cran.rstudio.com/")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("AER", repos = "https://cran.rstudio.com/")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("ggplot2")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("dplyr")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("car")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("tidyverse")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("stargazer")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("coefplot")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
install.packages("fixest")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
library(plm)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks plm::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks plm::lag(), stats::lag()
## ✖ dplyr::lead() masks plm::lead()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(coefplot)
library(fixest)
##
## Attaching package: 'fixest'
##
## The following object is masked from 'package:coefplot':
##
## coefplot
#Load Panel Data Set
file_list <- list.files(path = "/Users/asteriaxue/Documents/UCLA/R Studio /archive(2)", pattern = "*.csv", full.names = TRUE)
all_columns <- unique(unlist(lapply(file_list, function(x) colnames(read.csv(x)))))
df_list <- lapply(file_list, function(file) {
data <- read.csv(file)
data[setdiff(all_columns, colnames(data))] <- NA
data[, all_columns]
})
df <- do.call(rbind, df_list)
head(df)
## NULL
indicators <<- read.csv("/Users/asteriaxue/Documents/UCLA/R Studio /archive (2)/indicators.csv")
attach(indicators)
head(indicators)
## Country Year GDP..in.billion.USD. Inflation.Rate.... Unemployment.Rate....
## 1 USA 2010 15000 1.64 9.63
## 2 USA 2011 15500 3.16 8.94
## 3 USA 2012 16000 2.07 8.10
## 4 USA 2013 16500 1.50 7.70
## 5 USA 2014 17000 1.62 7.25
## 6 USA 2015 17500 0.12 5.32
## Economic.Growth....
## 1 2.55
## 2 1.53
## 3 2.28
## 4 1.84
## 5 2.53
## 6 3.08
# Rename columns
colnames(indicators)[colnames(indicators) == "GDP..in.billion.USD."] <- "GDP"
colnames(indicators)[colnames(indicators) == "Inflation.Rate...."] <- "inflation"
colnames(indicators)[colnames(indicators) == "Unemployment.Rate...."] <- "unemployment"
colnames(indicators)[colnames(indicators) == "Economic.Growth...."] <- "growth"
indicators.panel <- pdata.frame(indicators, c("Country", "Year"))
head(indicators)
## Country Year GDP inflation unemployment growth
## 1 USA 2010 15000 1.64 9.63 2.55
## 2 USA 2011 15500 3.16 8.94 1.53
## 3 USA 2012 16000 2.07 8.10 2.28
## 4 USA 2013 16500 1.50 7.70 1.84
## 5 USA 2014 17000 1.62 7.25 2.53
## 6 USA 2015 17500 0.12 5.32 3.08
ggplot(indicators, aes(x = inflation)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Country, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$inflation, na.rm = TRUE),
sd = sd(indicators$inflation, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Inflation Rate Across Countries", x = "Inflation Rate", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = unemployment)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Country, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$unemployment, na.rm = TRUE),
sd = sd(indicators$unemployment, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Unemployment Rate Across Countries", x = "Unemployment Rate", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = GDP)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) + facet_wrap(~ Country, ncol = 2)+
facet_wrap(~ Country, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$GDP, na.rm = TRUE),
sd = sd(indicators$GDP, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of GDP in billions of USD Across Countries", x = "GDP in Billions of USD", y = "Density") +
theme_minimal()
indicators$growth <- as.numeric(as.character(indicators$growth))
ggplot(indicators, aes(x = growth)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Country, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$growth, na.rm = TRUE),
sd = sd(indicators$growth, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Economic Growth Across Countries", x = "Economic Growth", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = inflation)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "lightblue", color = "black", alpha = 0.7) +
facet_wrap(~ Year, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$inflation, na.rm = TRUE),
sd = sd(indicators$inflation, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Inflation Rate Across Time (Year)", x = "Inflation Rate", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = unemployment)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Year, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$unemployment, na.rm = TRUE),
sd = sd(indicators$unemployment, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Unemployment Rate Across Time (Year)", x = "Unemployment Rate", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = GDP)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Year, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$GDP, na.rm = TRUE),
sd = sd(indicators$GDP, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of GDP in billions of USD Across Time (Year)", x = "Unemployment Rate", y = "Density") +
theme_minimal()
ggplot(indicators, aes(x = growth)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "#81D8D0", color = "black", alpha = 0.7) +
facet_wrap(~ Year, scales = "free_y") +
stat_function(fun = dnorm,
args = list(mean = mean(indicators$growth, na.rm = TRUE),
sd = sd(indicators$growth, na.rm = TRUE)),
color = "pink", lwd = 1) +
labs(title = "Distribution of Economic Growth Across Time (Year)", x = "Unemployment Rate", y = "Density") +
theme_minimal()
## Correlation Plots
install.packages("corrplot")
##
## The downloaded binary packages are in
## /var/folders/t5/ftzmv8gs4cjdl7zs60z01hqw0000gn/T//RtmphVciTe/downloaded_packages
library(corrplot)
## corrplot 0.95 loaded
indicators_subset <- indicators[, c("GDP", "inflation", "unemployment", "growth")]
correlation_matrix <- cor(indicators_subset, use = "complete.obs")
corrplot(correlation_matrix, method = "circle", type = "upper", col = colorRampPalette(c("#81D8D0", "white", "pink"))(200), addCoef.col = "black", tl.col = "black", tl.srt = 45)
attach(indicators)
## The following objects are masked from indicators (pos = 4):
##
## Country, Year
## The following object is masked from package:lmtest:
##
## unemployment
## Box plot
boxplot(inflation, ylab="Inflation Rate")
boxplot(unemployment, ylab="Unemployment Rate")
boxplot(GDP, ylab="GDP")
boxplot(growth, ylab="Economics Growth")
###Box plot for Economic growth shows the median line above 0, meaning that most countries have positive economic growth values across this time period. There are a few outliers which are negative (could mean either one country has those values, or a certain year had those negative values). The interquartile range is centered, showing that most countries in this data set experience moderate growth.
#Scatterplots (ALL)
# OVER TIME
## Scatter plot for inflation over time
ggplot(indicators, aes(x = Year, y = inflation, color = Country)) +
geom_point(size = 1) +
labs(title = "Inflation Over Time", x = "Year", y = "Inflation") +
theme_minimal()
## Scatterplot for GDP (in billions) over time
ggplot(indicators, aes(x = Year, y = GDP, color = Country)) +
geom_point(size = 1) +
labs(title = "GDP Over Time", x = "Year", y = "GDP") +
theme_minimal()
### Scatterplot for Economic Growth over time
ggplot(indicators, aes(x = Year, y = growth, color = Country)) +
geom_point(size = 1) +
labs(title = "Economic Growth Over Time", x = "Year", y = "Economic Growth") +
theme_minimal()
# Across Countries
## Scatter Plot for inflation across countries
ggplot(indicators, aes(x = Country, y = inflation, color = Year)) +
geom_point(size = 1) +
labs(title = "Inflation Across Country", x = "Country", y = "Inflation") +
theme_minimal()+ scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(size = 8)) + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1))
## Scatter plot for GDP across countries
ggplot(indicators, aes(x = Country, y = GDP, color = Year)) +
geom_point(size = 1) +
labs(title = "GDP Across Country", x = "Country", y = "GDP") +
theme_minimal()+ scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(size = 8)) + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1))
## Scatterplot for Unemployment Rate across countries
ggplot(indicators, aes(x = Country, y = unemployment, color = Year)) +
geom_point(size = 1) +
labs(title = "Unemployment Across Country", x = "Country", y = "Unemployment") +
theme_minimal()+ scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(size = 8)) + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1))
## Scatter Plot fot Economic Growth across countries
ggplot(indicators, aes(x = Country, y = growth, color = Year)) +
geom_point(size = 1) +
labs(title = "Economic Growth Across Country", x = "Country", y = "Economic Growth") +
theme_minimal()+ scale_x_discrete(guide = guide_axis(n.dodge = 2)) + theme(axis.text.x = element_text(size = 8)) + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1))
``` r
# Five Number Summaries:
summary_inflation <- indicators %>%
group_by(Year) %>%
summarise(
Min = min(inflation, na.rm = TRUE),
Q1 = quantile(inflation, 0.25, na.rm = TRUE),
Median = median(inflation, na.rm = TRUE),
Q3 = quantile(inflation, 0.75, na.rm = TRUE),
Max = max(inflation, na.rm = TRUE))
print(summary_inflation)
## # A tibble: 16 × 6
## Year Min Q1 Median Q3 Max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010 -0.72 1.67 3.3 6.16 12.0
## 2 2011 0 3.08 4 6.3 226
## 3 2012 0.02 2 2.8 7.3 384
## 4 2013 0.2 1.35 2.5 7.1 592
## 5 2014 -0.2 1.56 2.5 6.7 767
## 6 2015 0 0.5 1.5 5.95 857
## 7 2016 -0.5 0.8 1.8 5.4 853
## 8 2017 0.6 1.7 2.7 3.9 315
## 9 2018 0.3 1.9 2.44 3.82 853
## 10 2019 0.3 1.35 1.9 4.85 119
## 11 2020 -1.2 0.55 1.25 5.45 717
## 12 2021 -0.1 1.75 3.1 5.4 186
## 13 2022 0.5 5.05 5.79 8 500
## 14 2023 0 4.03 5.4 6.1 400
## 15 2024 1.2 3.1 3.9 5.55 500
## 16 2025 0 2.65 2.9 4.95 550
summary_GDP <- indicators %>%
group_by(Year) %>%
summarise(
Min = min(GDP, na.rm = TRUE),
Q1 = quantile(GDP, 0.25, na.rm = TRUE),
Median = median(GDP, na.rm = TRUE),
Q3 = quantile(GDP, 0.75, na.rm = TRUE),
Max = max(GDP, na.rm = TRUE))
print(summary_GDP)
## # A tibble: 16 × 6
## Year Min Q1 Median Q3 Max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010 105 740 1500 2312. 15000
## 2 2011 1 682 1600 2554. 15500
## 3 2012 1 478 1700 2532. 16000
## 4 2013 1 526. 1750 2600 16500
## 5 2014 1 530 1800 2700 17000
## 6 2015 1 471 1750 2800 17500
## 7 2016 1 471 1800 2900 18000
## 8 2017 1 480 1850 3000 18500
## 9 2018 1 505 1900 3100 19000
## 10 2019 1 506. 1950 3200 19500
## 11 2020 1 491 2000 3558. 20000
## 12 2021 1 509 2050 3750 20500
## 13 2022 1 528 2100 4050 21000
## 14 2023 1 543 2150 4150 21500
## 15 2024 1 553 2200 4250 22000
## 16 2025 1 573 2300 4350 22500
summary_Unemployment <- indicators %>%
group_by(Year) %>%
summarise(
Min = min(unemployment, na.rm = TRUE),
Q1 = quantile(unemployment, 0.25, na.rm = TRUE),
Median = median(unemployment, na.rm = TRUE),
Q3 = quantile(unemployment, 0.75, na.rm = TRUE),
Max = max(unemployment, na.rm = TRUE))
print(summary_Unemployment)
## # A tibble: 16 × 6
## Year Min Q1 Median Q3 Max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010 3.2 5.05 7.1 8.95 11.9
## 2 2011 3.1 4.85 6.5 8.6 10.5
## 3 2012 3 4.55 6 7.95 10.7
## 4 2013 2.9 4.85 6.4 7.55 12.2
## 5 2014 2.8 4.75 6 7.72 12.7
## 6 2015 3.1 4.6 5.6 7.25 11.9
## 7 2016 3.3 4.25 5 7.7 11.7
## 8 2017 3 4.1 5 7.9 12.7
## 9 2018 2.4 3.85 4.8 7.45 20.3
## 10 2019 2.4 3.7 4.5 7.2 11.9
## 11 2020 1.7 4.5 6.2 9.45 14.6
## 12 2021 1.6 4.3 5.2 8.05 36.1
## 13 2022 2.4 4.5 5.5 7 85.5
## 14 2023 2.3 4.4 5 6.7 65
## 15 2024 2.2 3.75 5 6.35 33
## 16 2025 2.1 3.55 5 6 15
summary_economicgrowth <- indicators %>%
group_by(Year) %>%
summarise(
Min = min(growth, na.rm = TRUE),
Q1 = quantile(growth, 0.25, na.rm = TRUE),
Median = median(growth, na.rm = TRUE),
Q3 = quantile(growth, 0.75, na.rm = TRUE),
Max = max(growth, na.rm = TRUE))
print(summary_economicgrowth)
## # A tibble: 16 × 6
## Year Min Q1 Median Q3 Max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010 1.7 2.75 4.2 6.7 10.3
## 2 2011 -0.1 2.02 3.6 6.5 10.5
## 3 2012 -2.8 1.91 3.4 6.2 8.4
## 4 2013 -1.8 1.95 2.9 5.9 10
## 5 2014 -0.4 1.52 3.3 6.25 10
## 6 2015 -14 1.15 3.08 5.6 10.3
## 7 2016 -3.6 1.1 2.69 5.5 10.9
## 8 2017 0 1.8 2.9 5.7 10.8
## 9 2018 0 1.6 2.7 5.4 11.1
## 10 2019 0 1.3 2 4.8 13.7
## 11 2020 -9.8 -5.45 -3.42 -0.25 13.2
## 12 2021 0 4.15 5.7 6.95 12
## 13 2022 -2 2.50 3.2 5.95 10
## 14 2023 -0.5 1.75 2 4.8 9
## 15 2024 0 2.05 2.3 4.35 8
## 16 2025 0 2.5 2.6 4.15 7.9
# Across Countries:
summary_inflation2 <- indicators %>%
group_by(Country) %>%
summarise(
Min = min(inflation, na.rm = TRUE),
Q1 = quantile(inflation, 0.25, na.rm = TRUE),
Median = median(inflation, na.rm = TRUE),
Q3 = quantile(inflation, 0.75, na.rm = TRUE),
Max = max(inflation, na.rm = TRUE))
print(summary_inflation2)
## # A tibble: 19 × 6
## Country Min Q1 Median Q3 Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia 0.9 1.8 2.3 2.92 6.1
## 2 Bangladesh 5 5.5 5.5 7.12 8.8
## 3 Brazil 3.2 3.74 5.82 6.35 10.1
## 4 Canada 0.7 1.48 1.9 2.97 6.8
## 5 China 0.9 1.95 2.3 2.65 5.4
## 6 France 0.1 0.8 1.55 2.42 6.1
## 7 Germany 0.3 1.05 1.85 3.55 7.1
## 8 India 3.4 4.97 5.4 6.93 12.0
## 9 Indonesia 3.8 38.5 102. 350. 550
## 10 Italy -0.2 0.175 1.6 3.08 8.7
## 11 Japan -0.72 0.015 0.45 0.95 2.4
## 12 Malaysia -1.2 1.67 2.1 3.05 3.8
## 13 Pakistan 3.8 6.22 9.8 12.7 25
## 14 Russia 2.9 4.68 5.85 7.62 15.5
## 15 Saudi Arabia 5.4 5.4 5.4 5.4 5.4
## 16 South Korea 0.4 1.23 1.95 2.82 5
## 17 Turkey 0 31.6 442 730. 857
## 18 UK 0 1.5 2.65 3.45 6
## 19 USA 0.12 1.59 2.1 3.6 8
summary_GDP2 <- indicators %>%
group_by(Country) %>%
summarise(
Min = min(GDP, na.rm = TRUE),
Q1 = quantile(GDP, 0.25, na.rm = TRUE),
Median = median(GDP, na.rm = TRUE),
Q3 = quantile(GDP, 0.75, na.rm = TRUE),
Max = max(GDP, na.rm = TRUE))
print(summary_GDP2)
## # A tibble: 19 × 6
## Country Min Q1 Median Q3 Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia 1300 1488. 1675 1862. 2050
## 2 Bangladesh 105 142. 180 212. 250
## 3 Brazil 1800 2175 2275 2465 2600
## 4 Canada 1500 1688. 1875 2062. 2250
## 5 China 6700 7825 8950 10500 12000
## 6 France 2400 2775 3150 3525 3900
## 7 Germany 3300 3675 4050 4425 4800
## 8 India 1500 1875 2250 2625 3000
## 9 Indonesia 1 1 1 1 846
## 10 Italy 1848 2129. 2730 4125 5200
## 11 Japan 5500 5875 6250 6625 7000
## 12 Malaysia 236 296 325 382. 480
## 13 Pakistan 164 243 287 323 500
## 14 Russia 1500 1688. 1750 1925 2300
## 15 Saudi Arabia 434 646 646 646 746
## 16 South Korea 1000 1188. 1375 1562. 1750
## 17 Turkey 1 1 2.5 4.25 774
## 18 UK 2200 2575 2950 3325 3700
## 19 USA 15000 16875 18750 20625 22500
summary_Unemployment2 <- indicators %>%
group_by(Country) %>%
summarise(
Min = min(unemployment, na.rm = TRUE),
Q1 = quantile(unemployment, 0.25, na.rm = TRUE),
Median = median(unemployment, na.rm = TRUE),
Q3 = quantile(unemployment, 0.75, na.rm = TRUE),
Max = max(unemployment, na.rm = TRUE))
print(summary_Unemployment2)
## # A tibble: 19 × 6
## Country Min Q1 Median Q3 Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia 4.6 5 5.35 5.72 7
## 2 Bangladesh 4.5 4.5 4.5 4.5 4.5
## 3 Brazil 5.3 6.52 8.5 11.6 13.5
## 4 Canada 5.4 5.88 6.85 7.25 9.5
## 5 China 3.7 4 4.1 5.03 5.4
## 6 France 6.9 8.02 9.45 10 10.5
## 7 Germany 3 3.58 4.25 5.2 6.8
## 8 India 5 5.48 5.9 7.35 9.6
## 9 Indonesia 1.6 3.42 3.95 6.4 8.4
## 10 Italy 6.5 8.33 9.8 11.3 12.7
## 11 Japan 2.1 2.4 2.9 4.18 5.1
## 12 Malaysia 2.8 3.1 3.3 4.08 4.6
## 13 Pakistan 5 5 5 5.5 6.5
## 14 Russia 4.5 5.2 5.6 5.95 7.5
## 15 Saudi Arabia 10.5 10.5 10.5 10.5 10.5
## 16 South Korea 3 3.2 3.45 3.7 4
## 17 Turkey 6.2 8.43 11.9 23.5 85.5
## 18 UK 3.3 3.77 4.45 6.35 8
## 19 USA 3.7 4.45 5.36 7.8 9.63
summary_economicgrowth2 <- indicators %>%
group_by(Country) %>%
summarise(
Min = min(growth, na.rm = TRUE),
Q1 = quantile(growth, 0.25, na.rm = TRUE),
Median = median(growth, na.rm = TRUE),
Q3 = quantile(growth, 0.75, na.rm = TRUE),
Max = max(growth, na.rm = TRUE))
print(summary_economicgrowth2)
## # A tibble: 19 × 6
## Country Min Q1 Median Q3 Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia -2.1 2.45 2.85 3 4.4
## 2 Bangladesh 5.2 6.45 7 7.55 8.2
## 3 Brazil -3.9 0.875 1.96 2.52 7.53
## 4 Canada -5.3 1.78 2.1 2.7 4.6
## 5 China 2.3 4.05 6.8 7.72 10.3
## 6 France -7.8 0.875 1.4 2.15 7
## 7 Germany -4.9 2.5 3.1 3.6 4
## 8 India -7.3 6.18 6.9 7.45 9.3
## 9 Indonesia 4.5 5.45 5.9 6.2 7.1
## 10 Italy -8.9 0.125 0.95 1.63 6.6
## 11 Japan -4.8 0.95 1.35 1.8 4.2
## 12 Malaysia -5.6 4.45 4.9 5.67 8.7
## 13 Pakistan -0.5 3.27 3.9 5.4 6
## 14 Russia -3.7 -0.275 1.4 2.58 4.3
## 15 Saudi Arabia -14 0 0 4.78 10.5
## 16 South Korea -1 2.25 2.75 3.15 6.2
## 17 Turkey 7 9.15 9.95 11.0 13.7
## 18 UK -9.8 1.37 1.85 2.58 7.4
## 19 USA -3.42 2.32 2.58 2.9 5.92
##Data Visualization of Log difference ## Visualizing Predictors vs Inflation by Countries
### Visualize log(inflation) vs log(unemployment) by countries
ggplot(indicators, aes(x=log(inflation), y=log(unemployment), colour = factor(Country))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(Unemployment)") +
scale_color_discrete(name="Countries")
### Visualize log(inflation) vs log(GDP) by countries
ggplot(indicators, aes(x=log(inflation), y=log(GDP), colour = factor(Country))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(GDP)") +
scale_color_discrete(name="Countries")
### Visualize log(inflation) vs log(growth) by countries
ggplot(indicators, aes(x=log(inflation), y=log(growth), colour = factor(Country))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(Growth)") +
scale_color_discrete(name="Countries")
### Visualize log(inflation) vs log(unemployment) by year
ggplot(indicators, aes(x=log(inflation), y=log(unemployment), colour = factor(Year))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(Unemployment)") +
scale_color_discrete(name="Year")
### Visualize log(inflation) vs log(GDP) by year
ggplot(indicators, aes(x=log(inflation), y=log(GDP), colour = factor(Year))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(GDP)") +
scale_color_discrete(name="Year")
### Visualize log(inflation) vs log(growth) by year
ggplot(indicators, aes(x=log(inflation), y=log(growth), colour = factor(Year))) +
geom_point() + xlab("log(Inflation)") +
ylab("log(Growth)") +
scale_color_discrete(name="Year")
attach(indicators)
## The following objects are masked from indicators (pos = 3):
##
## Country, GDP, growth, inflation, unemployment, Year
## The following objects are masked from indicators (pos = 5):
##
## Country, Year
## The following object is masked from package:lmtest:
##
## unemployment
library(plm)
library(AER)
indicators.panel.1 <- pdata.frame(indicators, index=c("Country", "Year"))
head(indicators.panel.1)
## Country Year GDP inflation unemployment growth
## Australia-2010 Australia 2010 1300 2.8 5.2 2.9
## Australia-2011 Australia 2011 1350 3.0 5.0 3.1
## Australia-2012 Australia 2012 1400 2.0 5.1 3.3
## Australia-2013 Australia 2013 1450 2.5 5.6 2.5
## Australia-2014 Australia 2014 1500 2.5 5.8 2.9
## Australia-2015 Australia 2015 1550 1.5 6.2 3.0
dim(indicators.panel.1)
## [1] 304 6
indicators.panel.1 <- subset(indicators.panel.1,
inflation > 0 &
GDP > 0 &
unemployment > 0 &
growth > 0)
pooled_model <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), model="pooling", data=indicators.panel.1)
summary(pooled_model)
## Pooling Model
##
## Call:
## plm(formula = log(inflation) ~ log(GDP) + log(unemployment) +
## log(growth), data = indicators.panel.1, model = "pooling")
##
## Unbalanced Panel: n = 19, T = 5-16, N = 259
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -4.13513 -0.59341 0.10170 0.65212 2.23533
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 3.305377 0.361732 9.1376 < 2.2e-16 ***
## log(GDP) -0.471904 0.027331 -17.2664 < 2.2e-16 ***
## log(unemployment) 0.564432 0.133658 4.2230 3.358e-05 ***
## log(growth) 0.339132 0.089806 3.7763 0.0001981 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 717.61
## Residual Sum of Squares: 239.1
## R-Squared: 0.66682
## Adj. R-Squared: 0.6629
## F-statistic: 170.114 on 3 and 255 DF, p-value: < 2.22e-16
coeftest(pooled_model, vcov=vcovHC(pooled_model,
type="HC0",cluster="group"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.305377 0.571738 5.7813 2.159e-08 ***
## log(GDP) -0.471904 0.051959 -9.0823 < 2.2e-16 ***
## log(unemployment) 0.564432 0.265011 2.1298 0.03414 *
## log(growth) 0.339132 0.159140 2.1310 0.03404 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(plm)
library(AER)
indicators.panel <- subset(indicators.panel,
inflation > 0 &
GDP > 0 &
unemployment > 0 &
growth > 0)
indicators.panel <- pdata.frame(indicators.panel, index = c("Country", "Year"))
indicators.panel <- indicators.panel %>%
filter(!is.infinite(log(GDP)) & !is.infinite(log(unemployment)) &
!is.infinite(log(growth)) & !is.infinite(log(inflation))) %>%
na.omit()
# Fixed Effect:
fixed_effects_model_individual <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), data = indicators.panel, model = "within", effect = "individual")
summary(fixed_effects_model_individual)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(inflation) ~ log(GDP) + log(unemployment) +
## log(growth), data = indicators.panel, effect = "individual",
## model = "within")
##
## Unbalanced Panel: n = 19, T = 5-16, N = 259
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.338241 -0.289432 0.010049 0.430274 1.808224
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log(GDP) -0.457344 0.072235 -6.3313 1.207e-09 ***
## log(unemployment) -0.296021 0.189312 -1.5637 0.1192
## log(growth) 0.148903 0.111340 1.3374 0.1824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 180.63
## Residual Sum of Squares: 150
## R-Squared: 0.16957
## Adj. R-Squared: 0.095989
## F-statistic: 16.1316 on 3 and 237 DF, p-value: 1.4169e-09
fixed_effects_model_time <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), data = indicators.panel, model = "within", effect = "time")
summary(fixed_effects_model_time)
## Oneway (time) effect Within Model
##
## Call:
## plm(formula = log(inflation) ~ log(GDP) + log(unemployment) +
## log(growth), data = indicators.panel, effect = "time", model = "within")
##
## Unbalanced Panel: n = 19, T = 5-16, N = 259
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.9261041 -0.4602518 0.0046608 0.5650221 2.5065397
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log(GDP) -0.485010 0.025082 -19.3368 < 2.2e-16 ***
## log(unemployment) 0.614846 0.122931 5.0016 1.099e-06 ***
## log(growth) 0.335060 0.084183 3.9801 9.128e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 663.55
## Residual Sum of Squares: 178.83
## R-Squared: 0.7305
## Adj. R-Squared: 0.71028
## F-statistic: 216.841 on 3 and 240 DF, p-value: < 2.22e-16
fixed_effects_model_full <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), data = indicators.panel, model = "within", effect = "twoways")
summary(fixed_effects_model_full)
## Twoways effects Within Model
##
## Call:
## plm(formula = log(inflation) ~ log(GDP) + log(unemployment) +
## log(growth), data = indicators.panel, effect = "twoways",
## model = "within")
##
## Unbalanced Panel: n = 19, T = 5-16, N = 259
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.076123 -0.254986 0.051086 0.324675 1.462256
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log(GDP) -0.572290 0.059778 -9.5735 <2e-16 ***
## log(unemployment) -0.240381 0.161034 -1.4927 0.1369
## log(growth) 0.044232 0.099582 0.4442 0.6573
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 130.79
## Residual Sum of Squares: 89.594
## R-Squared: 0.31499
## Adj. R-Squared: 0.20391
## F-statistic: 34.0278 on 3 and 222 DF, p-value: < 2.22e-16
random_effects_model <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), data = indicators.panel, model = "random")
summary(random_effects_model)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = log(inflation) ~ log(GDP) + log(unemployment) +
## log(growth), data = indicators.panel, model = "random")
##
## Unbalanced Panel: n = 19, T = 5-16, N = 259
##
## Effects:
## var std.dev share
## idiosyncratic 0.6329 0.7955 0.691
## individual 0.2831 0.5321 0.309
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4442 0.6170 0.6399 0.6274 0.6399 0.6499
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.8158 -0.3041 -0.0054 -0.0005 0.4604 1.6995
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 4.6001878 0.4829250 9.5257 < 2e-16 ***
## log(GDP) -0.4978827 0.0459727 -10.8300 < 2e-16 ***
## log(unemployment) -0.0066999 0.1662149 -0.0403 0.96785
## log(growth) 0.2166982 0.1016131 2.1326 0.03296 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 255.59
## Residual Sum of Squares: 166.38
## R-Squared: 0.34903
## Adj. R-Squared: 0.34137
## Chisq: 137.25 on 3 DF, p-value: < 2.22e-16
random_effects_model <- plm(log(inflation) ~log(GDP)+log(unemployment)+log(growth), data = indicators.panel, model = "random")
## Testing both effects jointly
phtest(fixed_effects_model_full, pooled_model)
##
## Hausman Test
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 142.9, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
## Only individual (country) effect
phtest(fixed_effects_model_individual,pooled_model)
##
## Hausman Test
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 48.066, df = 3, p-value = 2.062e-10
## alternative hypothesis: one model is inconsistent
## Only time effect
phtest(fixed_effects_model_time,pooled_model)
##
## Hausman Test
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 2.6949, df = 3, p-value = 0.4411
## alternative hypothesis: one model is inconsistent
## FE vs RE
phtest(fixed_effects_model_full, random_effects_model)
##
## Hausman Test
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 102.53, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
## Pooled vs RE
phtest(random_effects_model, pooled_model)
##
## Hausman Test
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 39.961, df = 3, p-value = 1.086e-08
## alternative hypothesis: one model is inconsistent
#Comparison with pFtest
## Testing both effects jointly
pFtest(fixed_effects_model_full, pooled_model)
##
## F test for twoways effects
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## F = 11.226, df1 = 33, df2 = 222, p-value < 2.2e-16
## alternative hypothesis: significant effects
## Only individual (country) effect
pFtest(fixed_effects_model_individual,pooled_model)
##
## F test for individual effects
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## F = 7.8213, df1 = 18, df2 = 237, p-value = 5.484e-16
## alternative hypothesis: significant effects
## Only time effect
pFtest(fixed_effects_model_time,pooled_model)
##
## F test for time effects
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## F = 5.3921, df1 = 15, df2 = 240, p-value = 2.377e-09
## alternative hypothesis: significant effects
plmtest(random_effects_model, type= 'bp')
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: log(inflation) ~ log(GDP) + log(unemployment) + log(growth)
## chisq = 109.66, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
##data cleanup
indicators <- indicators %>%
mutate(inflation = ifelse(inflation <= 0, NA, inflation + 0.0001))
cators_clean <- indicators %>%
filter(!is.na(inflation) & !is.infinite(inflation) & !is.nan(inflation))
indicators <- indicators %>%
filter(inflation > 0 & unemployment > 0 & GDP > 0 & growth > 0)
indicators$Country <- as.factor(indicators$Country)
indicators <- indicators %>%
filter(!is.na(Country) & !is.infinite(Country))
scatterplot(log(inflation)~Year, data = indicators, main = "Heterogeneity of Inflation across Time")
scatterplot(log(unemployment)~Year, data = indicators, main = "Heterogeneity of Unemployment across Time")
scatterplot(log(GDP)~Year, data = indicators, main = "Heterogeneity of GDP across Time")
scatterplot(log(growth)~Year, data = indicators, main = "Heterogeneity of Economics Growth across Time")
scatterplot(log(inflation)~Country, data = indicators, main = "Heterogeneity of Inflation across Country")
## [1] "125" "27" "17" "131" "132" "32" "238" "239" "106" "150" "217" "226"
## [13] "6"
scatterplot(log(unemployment)~Country, data = indicators, main = "Heterogeneity of Unemployment across Country")
## [1] "248" "109" "102" "228"
scatterplot(log(GDP)~Country, data = indicators, main = "Heterogeneity of GDP across Country")
## [1] "201" "202" "111" "112" "217"
scatterplot(log(growth)~Country, data = indicators, main = "Heterogeneity of Economics Growth across Country")
## [1] "125" "123" "130" "136" "180" "34" "240" "241" "141" "229" "226" "227"
## [13] "75" "82" "2" "4" "11"
fixed_effects_coef <- coef(fixed_effects_model_full)
random_effects_coef <- coef(random_effects_model)
fixed_effects_data <- data.frame(
predictor = names(fixed_effects_coef),
coefficient = fixed_effects_coef,
model = "Fixed Effects")
random_effects_data <- data.frame(
predictor = names(random_effects_coef),
coefficient = random_effects_coef,
model = "Random Effects")
coef_data <- rbind(fixed_effects_data, random_effects_data)
ggplot(coef_data, aes(x = reorder(predictor, coefficient), y = coefficient, fill = model)) +
geom_bar(stat = "identity", position = "dodge", width = 0.5) + # Dodge for side-by-side bars
coord_flip() + # Flip the axes for better readability
xlab("Predictor") +
ylab("Coefficient Estimate") +
theme_minimal() +
scale_fill_manual(values = c("Fixed Effects" = "steelblue", "Random Effects" = "orange")) +
ggtitle("Comparison of Coefficients: Fixed Effects vs. Random Effects") +
theme(legend.position = "top")
extract_coefficients <- function(model) {
coef_summary <- summary(model)$coefficients # Extract coefficients
data.frame(
predictor = rownames(coef_summary), # Ensure all predictors are included
coefficient = coef_summary[, 1] # Extract coefficient estimates
)
}
fixed_effects_data_full <- extract_coefficients(fixed_effects_model_full)
fixed_effects_data_time <- extract_coefficients(fixed_effects_model_time)
fixed_effects_data_individual <- extract_coefficients(fixed_effects_model_individual)
# Plot for Fixed Effects Full Model
ggplot(fixed_effects_data_full, aes(x = reorder(predictor, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
xlab("Predictor") +
ylab("Coefficient Estimate") +
theme_minimal() +
ggtitle("Fixed Effects Full Model Coefficients")
# Plot for Fixed Effects Time Model
ggplot(fixed_effects_data_time, aes(x = reorder(predictor, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
xlab("Predictor") +
ylab("Coefficient Estimate") +
theme_minimal() +
ggtitle("Fixed Effects Time Model Coefficients")
# Plot for Fixed Effects Individual Model
ggplot(fixed_effects_data_individual, aes(x = reorder(predictor, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "purple") +
coord_flip() +
xlab("Predictor") +
ylab("Coefficient Estimate") +
theme_minimal() +
ggtitle("Fixed Effects Individual Model Coefficients")
###The log(GDP) coefficient remains negative, similar in magnitude to the full model. The coefficient for log(unemployment) is also negative and slightly stronger than in the full model. The log(growth) coefficient remains positive and is comparable in magnitude to the other models. This suggests that the individual fixed effects model emphasizes the strong negative impact of both GDP and unemployment on the dependent variable.
ggplot(random_effects_data, aes(x = reorder(predictor, coefficient), y = coefficient)) +
geom_bar(stat = "identity", fill = "orange") +
coord_flip() + # Flips the axes for better readability
xlab("Predictor") +
ylab("Coefficient Estimate") +
theme_minimal() +
ggtitle("Random Effects Model Coefficients")
library(ggplot2)
ce <- function(model.obj) {
summ.model <- summary(get(model.obj))$coefficients
extract <- data.frame(Estimate = summ.model[,1],
SE = summ.model[,2],
vars = row.names(summ.model),
model = model.obj)
return(extract)
}
coefs <- do.call(rbind, sapply(
c("fixed_effects_model_full", "random_effects_model"),
ce,
simplify = FALSE
))
names(coefs)[2] <- "se"
gg_coef <- ggplot(coefs, aes(vars, Estimate)) +
geom_hline(yintercept = 0, lty = 1, lwd = 0.5, colour = "red") +
geom_errorbar(aes(ymin = Estimate - se, ymax = Estimate + se, colour = vars),
lwd = 1, width = 0) +
geom_point(size = 3, aes(colour = vars)) +
facet_grid(model ~ ., scales = "free") +
coord_flip() +
guides(colour = FALSE) +
labs(x = "Coefficient", y = "Value") +
ggtitle("Model Coefficients")
print(gg_coef)