R Markdown
This is an R Markdown document. Markdown is a simple formatting
syntax for authoring HTML, PDF, and MS Word documents. For more details
on using R Markdown see http://rmarkdown.rstudio.com.
# Install Library
##install.packages("GGally")
##install.packages("corrplot")
##install.packages("Hmisc")
# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.1
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.1
## corrplot 0.95 loaded
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.5.1
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(dplyr)
library(knitr)
library(broom)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
## Pengumpulan Data dari World Bank API
indicators <- list(
gdp_pc = "NY.GDP.PCAP.CD",
rnd_exp = "GB.XPD.RSDV.GD.ZS",
edu_exp = "SE.XPD.TOTL.GD.ZS",
acc_own = "FX.OWN.TOTL.ZS"
)
get_data <- function(indicator) {
url <- paste0("https://api.worldbank.org/v2/countries/all/indicators/",
indicator, "?format=json&per_page=20000")
res <- fromJSON(content(GET(url), "text"), flatten = TRUE)
data <- res[[2]] %>%
as_tibble() %>%
transmute(
country = country.value,
iso3c = country.id,
year = as.integer(date),
value = value
)
return(data)
}
data_list <- lapply(indicators, get_data)
names(data_list) <- names(indicators)
## Pembersihan dan Penggabungan Data
data_cleaned <- reduce(data_list, function(x, y) {
full_join(x, y, by = c("country", "iso3c", "year"))
})
colnames(data_cleaned) <- c("country", "iso3c", "year", "gdp_pc", "rnd_exp", "edu_exp", "acc_own")
## Hapus NA dan data duplikat
final_data <- data_cleaned %>%
filter(!is.na(gdp_pc) & !is.na(rnd_exp) & !is.na(edu_exp) & !is.na(acc_own)) %>%
distinct()
## Analisis Korelasi
### Transformasi log GDP per kapita
final_data <- final_data %>%
mutate(log_gdp_pc = log(gdp_pc))
## Analisis Korelasi
### Korelasi dan Visualisasi
vars <- final_data %>%
select(log_gdp_pc, rnd_exp, edu_exp, acc_own)
# Korelasi Pearson
cor_matrix <- rcorr(as.matrix(vars), type = "pearson")
# Heatmap
corrplot(cor_matrix$r, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")

## Analisis Korelasi
### Scatter Plot dengan Garis Tren
plot_list <- list(
ggplot(final_data, aes(x = rnd_exp, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("R&D vs Log(GDP per capita)"),
ggplot(final_data, aes(x = edu_exp, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Education vs Log(GDP per capita)"),
ggplot(final_data, aes(x = acc_own, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Account Ownership vs Log(GDP per capita)")
)
print(plot_list[[1]])
## `geom_smooth()` using formula = 'y ~ x'

print(plot_list[[2]])
## `geom_smooth()` using formula = 'y ~ x'

print(plot_list[[3]])
## `geom_smooth()` using formula = 'y ~ x'

## Analisis Korelasi
### Uji Signifikansi Korelasi
kable(cor_matrix$P, caption = "P-value Matrix untuk Uji Signifikansi Korelasi")
P-value Matrix untuk Uji Signifikansi Korelasi
| log_gdp_pc |
NA |
0 |
0 |
0 |
| rnd_exp |
0 |
NA |
0 |
0 |
| edu_exp |
0 |
0 |
NA |
0 |
| acc_own |
0 |
0 |
0 |
NA |
## Analisis Temuan
### Variabel dengan Dampak Terbesar
model <- lm(log_gdp_pc ~ rnd_exp + edu_exp + acc_own, data = final_data)
summary(model)
##
## Call:
## lm(formula = log_gdp_pc ~ rnd_exp + edu_exp + acc_own, data = final_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96257 -0.40599 0.04058 0.39261 1.86672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.743688 0.147812 45.624 < 2e-16 ***
## rnd_exp 0.263443 0.043609 6.041 3.9e-09 ***
## edu_exp 0.053997 0.029444 1.834 0.0675 .
## acc_own 0.029876 0.001736 17.208 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6448 on 352 degrees of freedom
## Multiple R-squared: 0.7174, Adjusted R-squared: 0.7149
## F-statistic: 297.8 on 3 and 352 DF, p-value: < 2.2e-16
# Tampilkan hasil regresi dalam bentuk tabel
reg_table <- tidy(model, conf.int = TRUE)
kable(reg_table, caption = "Hasil Regresi Linear: log(GDP per capita) terhadap Variabel Sosial Ekonomi (dengan Confidence Interval)")
Hasil Regresi Linear: log(GDP per capita) terhadap Variabel
Sosial Ekonomi (dengan Confidence Interval)
| (Intercept) |
6.7436881 |
0.1478116 |
45.623538 |
0.0000000 |
6.4529832 |
7.0343931 |
| rnd_exp |
0.2634431 |
0.0436094 |
6.040966 |
0.0000000 |
0.1776753 |
0.3492109 |
| edu_exp |
0.0539973 |
0.0294436 |
1.833922 |
0.0675096 |
-0.0039102 |
0.1119048 |
| acc_own |
0.0298764 |
0.0017362 |
17.207869 |
0.0000000 |
0.0264617 |
0.0332910 |
# Nilai R-squared dan uji F
model_summary <- summary(model)
r_squared <- model_summary$r.squared
f_statistic <- model_summary$fstatistic[1]
df1 <- model_summary$fstatistic[2]
df2 <- model_summary$fstatistic[3]
p_value <- pf(f_statistic, df1, df2, lower.tail = FALSE)
# VIF (Multikolinearitas)
vif_values <- vif(model)
kable(as.data.frame(vif_values), caption = "Nilai Variance Inflation Factor (VIF) untuk Setiap Variabel Independen")
Nilai Variance Inflation Factor (VIF) untuk Setiap Variabel
Independen
| rnd_exp |
1.815551 |
| edu_exp |
1.169495 |
| acc_own |
1.802186 |
# tabel hasil statistik model
table_stat <- tibble(
`R-squared` = r_squared,
`F-statistic` = f_statistic,
`df1` = df1,
`df2` = df2,
`p-value` = p_value
)
kable(table_stat, caption = "Ringkasan Statistik Model Regresi")
Ringkasan Statistik Model Regresi
| 0.7173527 |
297.7895 |
3 |
352 |
0 |
## Analisis Temuan
### Perbandingan untuk 10 Negara dengan GDP per kapita Tertinggi
top10_countries <- final_data %>%
group_by(country) %>%
filter(year == max(year)) %>%
arrange(desc(gdp_pc)) %>%
head(10)
kable(top10_countries[, c("country", "year", "gdp_pc", "rnd_exp", "edu_exp", "acc_own")])
| Luxembourg |
2017 |
110193.21 |
1.23898 |
3.486460 |
98.77 |
| Ireland |
2021 |
103961.98 |
1.11065 |
3.005980 |
99.66 |
| Switzerland |
2021 |
93664.77 |
3.30645 |
5.037090 |
99.49 |
| Norway |
2021 |
93072.89 |
1.88764 |
6.960220 |
99.48 |
| United States |
2021 |
71307.40 |
3.48313 |
5.428300 |
94.95 |
| Denmark |
2021 |
69727.99 |
2.76142 |
6.999930 |
100.00 |
| North America |
2021 |
69412.91 |
3.35576 |
5.088450 |
95.45 |
| Iceland |
2021 |
69178.40 |
2.77875 |
8.217810 |
100.00 |
| Singapore |
2017 |
61236.25 |
1.89913 |
2.756848 |
97.93 |
| Sweden |
2021 |
61174.97 |
3.40216 |
7.572200 |
99.69 |
## Analisis Temuan
### Korelasi antar Variabel Sosial Ekonomi
vars2 <- top10_countries %>%
ungroup() %>%
select(rnd_exp, edu_exp, acc_own) %>%
mutate(across(everything(), as.numeric))
corrplot(cor(vars2, use = "pairwise.complete.obs"), method = "number")
