R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

# Install Library
##install.packages("GGally")
##install.packages("corrplot")
##install.packages("Hmisc")
# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(httr)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.1
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.1
## corrplot 0.95 loaded
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.5.1
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(dplyr)
library(knitr)
library(broom)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## Pengumpulan Data dari World Bank API
indicators <- list(
  gdp_pc = "NY.GDP.PCAP.CD",
  rnd_exp = "GB.XPD.RSDV.GD.ZS",
  edu_exp = "SE.XPD.TOTL.GD.ZS",
  acc_own = "FX.OWN.TOTL.ZS"
)

get_data <- function(indicator) {
  url <- paste0("https://api.worldbank.org/v2/countries/all/indicators/",
                indicator, "?format=json&per_page=20000")
  res <- fromJSON(content(GET(url), "text"), flatten = TRUE)
  data <- res[[2]] %>% 
    as_tibble() %>% 
    transmute(
      country = country.value,
      iso3c = country.id,
      year = as.integer(date),
      value = value
    )
  return(data)
}

data_list <- lapply(indicators, get_data)
names(data_list) <- names(indicators)
## Pembersihan dan Penggabungan Data
data_cleaned <- reduce(data_list, function(x, y) {
  full_join(x, y, by = c("country", "iso3c", "year"))
})

colnames(data_cleaned) <- c("country", "iso3c", "year", "gdp_pc", "rnd_exp", "edu_exp", "acc_own")

## Hapus NA dan data duplikat
final_data <- data_cleaned %>% 
  filter(!is.na(gdp_pc) & !is.na(rnd_exp) & !is.na(edu_exp) & !is.na(acc_own)) %>% 
  distinct()
## Analisis Korelasi
### Transformasi log GDP per kapita

final_data <- final_data %>% 
  mutate(log_gdp_pc = log(gdp_pc))
## Analisis Korelasi
### Korelasi dan Visualisasi

vars <- final_data %>% 
  select(log_gdp_pc, rnd_exp, edu_exp, acc_own)

# Korelasi Pearson
cor_matrix <- rcorr(as.matrix(vars), type = "pearson")

# Heatmap
corrplot(cor_matrix$r, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")

## Analisis Korelasi
### Scatter Plot dengan Garis Tren
plot_list <- list(
  ggplot(final_data, aes(x = rnd_exp, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("R&D vs Log(GDP per capita)"),
  ggplot(final_data, aes(x = edu_exp, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Education vs Log(GDP per capita)"),
  ggplot(final_data, aes(x = acc_own, y = log_gdp_pc)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Account Ownership vs Log(GDP per capita)")
)

print(plot_list[[1]])
## `geom_smooth()` using formula = 'y ~ x'

print(plot_list[[2]])
## `geom_smooth()` using formula = 'y ~ x'

print(plot_list[[3]])
## `geom_smooth()` using formula = 'y ~ x'

## Analisis Korelasi
### Uji Signifikansi Korelasi
kable(cor_matrix$P, caption = "P-value Matrix untuk Uji Signifikansi Korelasi")
P-value Matrix untuk Uji Signifikansi Korelasi
log_gdp_pc rnd_exp edu_exp acc_own
log_gdp_pc NA 0 0 0
rnd_exp 0 NA 0 0
edu_exp 0 0 NA 0
acc_own 0 0 0 NA
## Analisis Temuan
### Variabel dengan Dampak Terbesar
model <- lm(log_gdp_pc ~ rnd_exp + edu_exp + acc_own, data = final_data)
summary(model)
## 
## Call:
## lm(formula = log_gdp_pc ~ rnd_exp + edu_exp + acc_own, data = final_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96257 -0.40599  0.04058  0.39261  1.86672 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.743688   0.147812  45.624  < 2e-16 ***
## rnd_exp     0.263443   0.043609   6.041  3.9e-09 ***
## edu_exp     0.053997   0.029444   1.834   0.0675 .  
## acc_own     0.029876   0.001736  17.208  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6448 on 352 degrees of freedom
## Multiple R-squared:  0.7174, Adjusted R-squared:  0.7149 
## F-statistic: 297.8 on 3 and 352 DF,  p-value: < 2.2e-16
# Tampilkan hasil regresi dalam bentuk tabel
reg_table <- tidy(model, conf.int = TRUE)
kable(reg_table, caption = "Hasil Regresi Linear: log(GDP per capita) terhadap Variabel Sosial Ekonomi (dengan Confidence Interval)")
Hasil Regresi Linear: log(GDP per capita) terhadap Variabel Sosial Ekonomi (dengan Confidence Interval)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 6.7436881 0.1478116 45.623538 0.0000000 6.4529832 7.0343931
rnd_exp 0.2634431 0.0436094 6.040966 0.0000000 0.1776753 0.3492109
edu_exp 0.0539973 0.0294436 1.833922 0.0675096 -0.0039102 0.1119048
acc_own 0.0298764 0.0017362 17.207869 0.0000000 0.0264617 0.0332910
# Nilai R-squared dan uji F
model_summary <- summary(model)
r_squared <- model_summary$r.squared
f_statistic <- model_summary$fstatistic[1]
df1 <- model_summary$fstatistic[2]
df2 <- model_summary$fstatistic[3]
p_value <- pf(f_statistic, df1, df2, lower.tail = FALSE)

# VIF (Multikolinearitas)
vif_values <- vif(model)
kable(as.data.frame(vif_values), caption = "Nilai Variance Inflation Factor (VIF) untuk Setiap Variabel Independen")
Nilai Variance Inflation Factor (VIF) untuk Setiap Variabel Independen
vif_values
rnd_exp 1.815551
edu_exp 1.169495
acc_own 1.802186
# tabel hasil statistik model
table_stat <- tibble(
  `R-squared` = r_squared,
  `F-statistic` = f_statistic,
  `df1` = df1,
  `df2` = df2,
  `p-value` = p_value
)
kable(table_stat, caption = "Ringkasan Statistik Model Regresi")
Ringkasan Statistik Model Regresi
R-squared F-statistic df1 df2 p-value
0.7173527 297.7895 3 352 0
## Analisis Temuan
### Perbandingan untuk 10 Negara dengan GDP per kapita Tertinggi
top10_countries <- final_data %>% 
  group_by(country) %>% 
  filter(year == max(year)) %>% 
  arrange(desc(gdp_pc)) %>% 
  head(10)

kable(top10_countries[, c("country", "year", "gdp_pc", "rnd_exp", "edu_exp", "acc_own")])
country year gdp_pc rnd_exp edu_exp acc_own
Luxembourg 2017 110193.21 1.23898 3.486460 98.77
Ireland 2021 103961.98 1.11065 3.005980 99.66
Switzerland 2021 93664.77 3.30645 5.037090 99.49
Norway 2021 93072.89 1.88764 6.960220 99.48
United States 2021 71307.40 3.48313 5.428300 94.95
Denmark 2021 69727.99 2.76142 6.999930 100.00
North America 2021 69412.91 3.35576 5.088450 95.45
Iceland 2021 69178.40 2.77875 8.217810 100.00
Singapore 2017 61236.25 1.89913 2.756848 97.93
Sweden 2021 61174.97 3.40216 7.572200 99.69
## Analisis Temuan
### Korelasi antar Variabel Sosial Ekonomi
vars2 <- top10_countries %>% 
  ungroup() %>% 
  select(rnd_exp, edu_exp, acc_own) %>% 
  mutate(across(everything(), as.numeric))

corrplot(cor(vars2, use = "pairwise.complete.obs"), method = "number")